From b1cb537b4e89b82048c73a42c750b4c6f4ae1990 Mon Sep 17 00:00:00 2001 From: Joosep Pata Date: Wed, 24 Aug 2022 18:41:25 +0300 Subject: [PATCH] Faster test, pre-commit formatting, general cleanup (#129) * add pre commit hooks * add reqs * fix configs * cleanup * install from reqs file --- .github/workflows/pre-commit.yml | 27 + .github/workflows/test.yml | 22 +- .gitignore | 4 + .pre-commit-config.yaml | 40 ++ mlpf/pipeline.py | 464 +++++++------- mlpf/tfmodel/data.py | 7 - mlpf/tfmodel/datasets/BaseDatasetFactory.py | 56 +- mlpf/tfmodel/datasets/CMSDatasetFactory.py | 6 +- .../tfmodel/datasets/DelphesDatasetFactory.py | 6 +- mlpf/tfmodel/delphes_data.py | 179 ------ mlpf/tfmodel/fast_attention.py | 486 --------------- mlpf/tfmodel/fast_attention_util.py | 195 ------ mlpf/tfmodel/model.py | 590 ++++++++++-------- mlpf/tfmodel/model_setup.py | 350 +++++------ mlpf/tfmodel/mpnn.py | 291 --------- mlpf/tfmodel/opt.py | 91 --- mlpf/tfmodel/pred_tf_model.py | 156 ----- mlpf/tfmodel/tf_data.py | 128 ---- mlpf/tfmodel/utils.py | 292 ++++----- parameters/cms-gen.yaml | 8 +- parameters/cms.yaml | 8 +- parameters/delphes.yaml | 13 +- requirements.txt | 35 ++ scripts/local_test_cms_pipeline.sh | 2 +- scripts/local_test_delphes_pipeline.sh | 2 +- 25 files changed, 964 insertions(+), 2494 deletions(-) create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 mlpf/tfmodel/data.py delete mode 100644 mlpf/tfmodel/delphes_data.py delete mode 100644 mlpf/tfmodel/fast_attention.py delete mode 100644 mlpf/tfmodel/fast_attention_util.py delete mode 100644 mlpf/tfmodel/mpnn.py delete mode 100644 mlpf/tfmodel/opt.py delete mode 100644 mlpf/tfmodel/pred_tf_model.py delete mode 100644 mlpf/tfmodel/tf_data.py create mode 100644 requirements.txt diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 000000000..b10f1c781 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,27 @@ +name: Run pre-commit + +on: + pull_request: + branches: [ main ] + push: + branches: [ main ] + +jobs: + lint: + name: Lint PR or Push to main + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.9] + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Run Lint + uses: pre-commit/action@v2.0.0 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 96b3d24c5..65210ca9d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,15 +20,8 @@ jobs: python-version: '3.9' - name: Install python deps run: | - pip install tensorflow==2.9 setGPU \ - sklearn matplotlib boost_histogram mplhep pandas scipy uproot \ - awkward vector pyarrow fastjet keras-tuner networkx \ - tensorflow-probability tensorflow-addons \ - tqdm click tensorflow-datasets 'ray[default]'==1.6.0 'ray[tune]==1.6.0' \ - tf-models-official tensorflow-text \ - tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \ - tensorflow-estimator keras \ - notebook papermill ./hep_tfds + pip install -r requirements.txt + pip install ./hep_tfds HOROVOD_WITH_TENSORFLOW=1 pip install horovod[tensorflow,keras] - name: Run delphes TF model run: ./scripts/local_test_delphes_pipeline.sh @@ -44,15 +37,8 @@ jobs: python-version: '3.9' - name: Install python deps run: | - pip install tensorflow==2.9 setGPU \ - sklearn matplotlib boost_histogram mplhep pandas scipy uproot \ - awkward vector pyarrow fastjet keras-tuner networkx \ - tensorflow-probability tensorflow-addons \ - tqdm click tensorflow-datasets 'ray[default]'==1.6.0 'ray[tune]==1.6.0' \ - tf-models-official tensorflow-text \ - tf2onnx onnxruntime zenodo_get seaborn scikit-optimize nevergrad \ - tensorflow-estimator keras \ - notebook papermill ./hep_tfds + pip install -r requirements.txt + pip install ./hep_tfds HOROVOD_WITH_TENSORFLOW=1 pip install horovod[tensorflow,keras] - name: Run CMS TF model using the pipeline run: ./scripts/local_test_cms_pipeline.sh diff --git a/.gitignore b/.gitignore index a378a4529..6f720c1de 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.npz *.pt *.pdf +*.png data/* experiments/* prp/* @@ -19,3 +20,6 @@ test/__pycache__/ *playground.py nohup.out + +*.pkl +*.pkl.bz2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..f989e4e10 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,40 @@ +default_language_version: + python: python3.9 + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: check-merge-conflict + - id: check-xml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: mixed-line-ending + args: ['--fix=no'] + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ['--profile', 'black', '--filter-files'] + +- repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black-jupyter + language_version: python3 + args: [--line-length=125] + +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + # black-compatible flake-8 config + args: ['--max-line-length=125', # github viewer width + '--extend-ignore=E203,W605'] # E203 is not PEP8 compliant diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py index fe75e32e8..5c202b4d4 100644 --- a/mlpf/pipeline.py +++ b/mlpf/pipeline.py @@ -1,87 +1,60 @@ -try: - import comet_ml -except ModuleNotFoundError as e: - print("comet_ml not found, ignoring") - try: import horovod.tensorflow.keras as hvd except ModuleNotFoundError: print("hvd not enabled, ignoring") -import sys -import os -import yaml import json -from datetime import datetime -import glob -import random +import logging +import os +import pickle import platform -import numpy as np -from pathlib import Path -import click -from tqdm import tqdm +import random import shutil +from datetime import datetime from functools import partial -import shlex -import subprocess -import matplotlib.pyplot as plt -import logging -import pickle +from pathlib import Path +import click +import numpy as np import tensorflow as tf from tensorflow.keras import mixed_precision -import tensorflow_addons as tfa -import keras - - -from tfmodel.data import Dataset -from tfmodel.datasets import CMSDatasetFactory, DelphesDatasetFactory +from tfmodel import hypertuning +from tfmodel.lr_finder import LRFinder from tfmodel.model_setup import ( - make_model, - configure_model_weights, - LearningRateLoggingCallback, - prepare_callbacks, FlattenedCategoricalAccuracy, SingleClassRecall, + configure_model_weights, eval_model, freeze_model, + make_model, + prepare_callbacks, ) - from tfmodel.utils import ( + create_experiment_dir, + delete_all_but_best_checkpoint, + get_best_checkpoint, + get_datasets, + get_heptfds_dataset, + get_loss_dict, get_lr_schedule, get_optimizer, - create_experiment_dir, get_strategy, - make_weight_function, + get_tuner, load_config, - compute_weights_invsqrt, - compute_weights_none, - get_train_val_datasets, - get_dataset_def, - set_config_loss, - get_loss_dict, parse_config, - get_best_checkpoint, - delete_all_but_best_checkpoint, - get_tuner, - get_heptfds_dataset, - get_datasets, + set_config_loss, ) - -from tfmodel.lr_finder import LRFinder -from tfmodel import hypertuning from tfmodel.utils_analysis import ( - plot_ray_analysis, analyze_ray_experiment, - topk_summary_plot_v2, - summarize_top_k, count_skipped_configurations, + plot_ray_analysis, + summarize_top_k, + topk_summary_plot_v2, ) - def customize_pipeline_test(config): - #for cms.yaml, keep only ttbar + # for cms.yaml, keep only ttbar if "physical" in config["train_test_datasets"]: config["train_test_datasets"]["physical"]["datasets"] = ["cms_pf_ttbar"] config["train_test_datasets"] = {"physical": config["train_test_datasets"]["physical"]} @@ -90,15 +63,16 @@ def customize_pipeline_test(config): return config -customization_functions = { - "pipeline_test": customize_pipeline_test -} + +customization_functions = {"pipeline_test": customize_pipeline_test} + @click.group() @click.help_option("-h", "--help") def main(): pass + @main.command() @click.help_option("-h", "--help") @click.option("-c", "--config", help="configuration file", type=click.Path()) @@ -113,25 +87,35 @@ def main(): @click.option("--comet-offline", help="log comet-ml experiment locally", is_flag=True) def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, customize, comet_offline): - #tf.debugging.enable_check_numerics() + # tf.debugging.enable_check_numerics() """Train a model defined by config""" config_file_path = config - config, config_file_stem = parse_config( - config, nepochs=nepochs, weights=weights - ) + config, config_file_stem = parse_config(config, nepochs=nepochs, weights=weights) if plot_freq: config["callbacks"]["plot_freq"] = plot_freq if customize: config = customization_functions[customize](config) - + + # Decide tf.distribute.strategy depending on number of available GPUs + horovod_enabled = config["setup"]["horovod_enabled"] + if horovod_enabled: + num_gpus = initialize_horovod() + else: + strategy, num_gpus = get_strategy() + + outdir = "" + if not horovod_enabled or hvd.rank() == 0: + outdir = create_experiment_dir(prefix=prefix + config_file_stem + "_", suffix=platform.node()) + shutil.copy(config_file_path, outdir + "/config.yaml") # Copy the config file to the train dir for later reference try: if comet_offline: print("Using comet-ml OfflineExperiment, saving logs locally.") from comet_ml import OfflineExperiment + experiment = OfflineExperiment( project_name="particleflow-tf", auto_metric_logging=True, @@ -144,7 +128,7 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, else: print("Using comet-ml Experiment, streaming logs to www.comet.ml.") from comet_ml import Experiment - offline_dir = None + experiment = Experiment( project_name="particleflow-tf", auto_metric_logging=True, @@ -154,31 +138,26 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, auto_histogram_activation_logging=False, ) except Exception as e: - print("Failed to initialize comet-ml dashboard") + print("Failed to initialize comet-ml dashboard: {}".format(e)) experiment = None - - # Decide tf.distribute.strategy depending on number of available GPUs - horovod_enabled = config["setup"]["horovod_enabled"] - if horovod_enabled: - num_gpus = initialize_horovod() - else: - strategy, num_gpus = get_strategy() - outdir = '' - if not horovod_enabled or hvd.rank() == 0: - outdir = create_experiment_dir(prefix=prefix + config_file_stem + "_", suffix=platform.node()) - if experiment: - experiment.set_name(outdir) - experiment.log_code("mlpf/tfmodel/model.py") - experiment.log_code("mlpf/tfmodel/utils.py") - experiment.log_code(config_file_path) - - shutil.copy(config_file_path, outdir + "/config.yaml") # Copy the config file to the train dir for later reference + if experiment: + experiment.set_name(outdir) + experiment.log_code("mlpf/tfmodel/model.py") + experiment.log_code("mlpf/tfmodel/utils.py") + experiment.log_code(config_file_path) ds_train, num_train_steps = get_datasets(config["train_test_datasets"], config, num_gpus, "train") ds_test, num_test_steps = get_datasets(config["train_test_datasets"], config, num_gpus, "test") - ds_val, ds_info = get_heptfds_dataset(config["validation_datasets"][0], config, num_gpus, "test", config["setup"]["num_events_validation"], supervised=False) + ds_val, ds_info = get_heptfds_dataset( + config["validation_datasets"][0], + config, + num_gpus, + "test", + config["setup"]["num_events_validation"], + supervised=False, + ) ds_val = ds_val.batch(5) - + if ntrain: ds_train = ds_train.take(ntrain) num_train_steps = ntrain @@ -191,23 +170,18 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, total_steps = num_train_steps * config["setup"]["num_epochs"] print("total_steps", total_steps) - - if horovod_enabled : - model,optim_callbacks,initial_epoch = model_scope(config, total_steps, weights, horovod_enabled) + if horovod_enabled: + model, optim_callbacks, initial_epoch = model_scope(config, total_steps, weights, horovod_enabled) else: with strategy.scope(): - model,optim_callbacks,initial_epoch = model_scope(config, total_steps, weights) + model, optim_callbacks, initial_epoch = model_scope(config, total_steps, weights) callbacks = prepare_callbacks( - config, - outdir, - ds_val, - comet_experiment=experiment, - horovod_enabled=config["setup"]["horovod_enabled"] + config, outdir, ds_val, comet_experiment=experiment, horovod_enabled=config["setup"]["horovod_enabled"] ) verbose = 1 - if horovod_enabled: + if horovod_enabled: callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0)) callbacks.append(hvd.callbacks.MetricAverageCallback()) verbose = 1 if hvd.rank() == 0 else 0 @@ -215,11 +189,9 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, num_train_steps /= hvd.size() num_test_steps /= hvd.size() - callbacks.append(optim_callbacks) - - fit_result = model.fit( + model.fit( ds_train.repeat(), validation_data=ds_test.repeat(), epochs=initial_epoch + config["setup"]["num_epochs"], @@ -227,12 +199,13 @@ def train(config, weights, ntrain, ntest, nepochs, recreate, prefix, plot_freq, steps_per_epoch=num_train_steps, validation_steps=num_test_steps, initial_epoch=initial_epoch, - verbose=verbose + verbose=verbose, ) # if not horovod_enabled or hvd.rank()==0: # model_save(outdir, fit_result, model, weights) + def model_save(outdir, fit_result, model, weights): history_path = Path(outdir) / "history" history_path = str(history_path) @@ -243,13 +216,13 @@ def model_save(outdir, fit_result, model, weights): print("Loading best weights that could be found from {}".format(weights)) model.load_weights(weights, by_name=True) - #model.save(outdir + "/model_full", save_format="tf") + # model.save(outdir + "/model_full", save_format="tf") print("Training done.") + def model_scope(config, total_steps, weights, horovod_enabled=False): lr_schedule, optim_callbacks, lr = get_lr_schedule(config, steps=total_steps) opt = get_optimizer(config, lr_schedule) - if config["setup"]["dtype"] == "float16": model_dtype = tf.dtypes.float16 @@ -266,7 +239,7 @@ def model_scope(config, total_steps, weights, horovod_enabled=False): initial_epoch = 0 loaded_opt = None - + if weights: if lr_schedule: raise Exception("Restoring the optimizer state with a learning rate schedule is currently not supported") @@ -301,19 +274,19 @@ def model_scope(config, total_steps, weights, horovod_enabled=False): "cls": [ FlattenedCategoricalAccuracy(name="acc_unweighted", dtype=tf.float64), FlattenedCategoricalAccuracy(use_weights=True, name="acc_weighted", dtype=tf.float64), - ] + [ - SingleClassRecall( - icls, - name="rec_cls{}".format(icls), - dtype=tf.float64) for icls in range(config["dataset"]["num_output_classes"]) + ] + + [ + SingleClassRecall(icls, name="rec_cls{}".format(icls), dtype=tf.float64) + for icls in range(config["dataset"]["num_output_classes"]) ] }, ) model.summary() - #Set the optimizer weights + # Set the optimizer weights if loaded_opt: + def model_weight_setting(): grad_vars = model.trainable_weights zero_grads = [tf.zeros_like(w) for w in grad_vars] @@ -322,26 +295,25 @@ def model_weight_setting(): model.optimizer.optimizer.optimizer.set_weights(loaded_opt["weights"]) else: model.optimizer.set_weights(loaded_opt["weights"]) - try: - strategy.run(model_weight_setting) - except Exception as e: - print(e) - return model,optim_callbacks,initial_epoch + # FIXME: check that this still works with multiple GPUs + strategy = tf.distribute.get_strategy() + strategy.run(model_weight_setting) + + return model, optim_callbacks, initial_epoch + def initialize_horovod(): hvd.init() - gpus = tf.config.experimental.list_physical_devices('GPU') + gpus = tf.config.experimental.list_physical_devices("GPU") for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: - tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') + tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], "GPU") return hvd.size() - - @main.command() @click.help_option("-h", "--help") @click.option("-t", "--train_dir", required=True, help="directory containing a completed training", type=click.Path()) @@ -358,7 +330,6 @@ def compute_validation_loss(config, train_dir, weights): model_dtype = tf.dtypes.float16 policy = mixed_precision.Policy("mixed_float16") mixed_precision.set_global_policy(policy) - opt = mixed_precision.LossScaleOptimizer(opt) else: model_dtype = tf.dtypes.float32 @@ -387,11 +358,10 @@ def compute_validation_loss(config, train_dir, weights): "cls": [ FlattenedCategoricalAccuracy(name="acc_unweighted", dtype=tf.float64), FlattenedCategoricalAccuracy(use_weights=True, name="acc_weighted", dtype=tf.float64), - ] + [ - SingleClassRecall( - icls, - name="rec_cls{}".format(icls), - dtype=tf.float64) for icls in range(config["dataset"]["num_output_classes"]) + ] + + [ + SingleClassRecall(icls, name="rec_cls{}".format(icls), dtype=tf.float64) + for icls in range(config["dataset"]["num_output_classes"]) ] }, ) @@ -404,6 +374,7 @@ def compute_validation_loss(config, train_dir, weights): with open("{}/losses.txt".format(train_dir), "w") as loss_file: loss_file.write(json.dumps(losses) + "\n") + @main.command() @click.help_option("-h", "--help") @click.option("-t", "--train_dir", required=True, help="directory containing a completed training", type=click.Path()) @@ -417,7 +388,7 @@ def evaluate(config, train_dir, weights, customize, nevents): config = Path(train_dir) / "config.yaml" assert config.exists(), "Could not find config file in train_dir, please provide one with -c " config, _ = parse_config(config, weights=weights) - + if customize: config = customization_functions[customize](config) @@ -425,18 +396,17 @@ def evaluate(config, train_dir, weights, customize, nevents): model_dtype = tf.dtypes.float16 policy = mixed_precision.Policy("mixed_float16") mixed_precision.set_global_policy(policy) - opt = mixed_precision.LossScaleOptimizer(opt) else: model_dtype = tf.dtypes.float32 strategy, num_gpus = get_strategy() - #physical_devices = tf.config.list_physical_devices('GPU') - #for dev in physical_devices: + # physical_devices = tf.config.list_physical_devices('GPU') + # for dev in physical_devices: # tf.config.experimental.set_memory_growth(dev, True) model = make_model(config, model_dtype) model.build((1, config["dataset"]["padded_num_elem_size"], config["dataset"]["num_input_features"])) - + # need to load the weights in the same trainable configuration as the model was set up configure_model_weights(model, config["setup"].get("weights_config", "all")) if weights: @@ -459,6 +429,7 @@ def evaluate(config, train_dir, weights, customize, nevents): freeze_model(model, config, train_dir) + @main.command() @click.help_option("-h", "--help") @click.option("-c", "--config", help="configuration file", type=click.Path()) @@ -547,10 +518,19 @@ def hypertune(config, outdir, ntrain, ntest, recreate): config["setup"]["num_epochs"] = config["hypertune"]["hyperband"]["max_epochs"] strategy, num_gpus = get_strategy() - - ds_train, ds_info = get_heptfds_dataset(config["training_dataset"], config, num_gpus, "train", config["setup"]["num_events_train"]) + + ds_train, ds_info = get_heptfds_dataset( + config["training_dataset"], config, num_gpus, "train", config["setup"]["num_events_train"] + ) ds_test, _ = get_heptfds_dataset(config["testing_dataset"], config, num_gpus, "test", config["setup"]["num_events_test"]) - ds_val, _ = get_heptfds_dataset(config["validation_datasets"][0], config, num_gpus, "test", config["setup"]["num_events_validation"], supervised=False) + ds_val, _ = get_heptfds_dataset( + config["validation_datasets"][0], + config, + num_gpus, + "test", + config["setup"]["num_events_validation"], + supervised=False, + ) ds_val = ds_val.batch(5) num_train_steps = 0 @@ -569,7 +549,7 @@ def hypertune(config, outdir, ntrain, ntest, recreate): ) callbacks.append(optim_callbacks) - callbacks.append(tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss')) + callbacks.append(tf.keras.callbacks.EarlyStopping(patience=20, monitor="val_loss")) tuner = get_tuner(config["hypertune"], model_builder, outdir, recreate, strategy) tuner.search_space_summary() @@ -591,80 +571,89 @@ def hypertune(config, outdir, ntrain, ntest, recreate): def build_model_and_train(config, checkpoint_dir=None, full_config=None, ntrain=None, ntest=None, name=None, seeds=False): - from ray import tune - from raytune.search_space import set_raytune_search_parameters - from ray.tune.integration.keras import TuneReportCheckpointCallback - if seeds: - # Set seeds for reproducibility - random.seed(1234) - np.random.seed(1234) - tf.random.set_seed(1234) + from ray import tune + from ray.tune.integration.keras import TuneReportCheckpointCallback + from raytune.search_space import set_raytune_search_parameters - full_config, config_file_stem = parse_config(full_config) + if seeds: + # Set seeds for reproducibility + random.seed(1234) + np.random.seed(1234) + tf.random.set_seed(1234) - if config is not None: - full_config = set_raytune_search_parameters(search_space=config, config=full_config) + full_config, config_file_stem = parse_config(full_config) - strategy, num_gpus = get_strategy() + if config is not None: + full_config = set_raytune_search_parameters(search_space=config, config=full_config) - ds_train, num_train_steps = get_datasets(full_config["train_test_datasets"], full_config, num_gpus, "train") - ds_test, num_test_steps = get_datasets(full_config["train_test_datasets"], full_config, num_gpus, "test") - ds_val, ds_info = get_heptfds_dataset(full_config["validation_datasets"][0], full_config, num_gpus, "test", full_config["setup"]["num_events_validation"], supervised=False) - ds_val = ds_val.batch(5) - - if ntrain: - ds_train = ds_train.take(ntrain) - num_train_steps = ntrain - if ntest: - ds_test = ds_test.take(ntest) - num_test_steps = ntest - - print("num_train_steps", num_train_steps) - print("num_test_steps", num_test_steps) - total_steps = num_train_steps * full_config["setup"]["num_epochs"] - print("total_steps", total_steps) - - callbacks = prepare_callbacks( - full_config, - tune.get_trial_dir(), - ds_val, - ) + strategy, num_gpus = get_strategy() - callbacks = callbacks[:-1] # remove the CustomCallback at the end of the list + ds_train, num_train_steps = get_datasets(full_config["train_test_datasets"], full_config, num_gpus, "train") + ds_test, num_test_steps = get_datasets(full_config["train_test_datasets"], full_config, num_gpus, "test") + ds_val, ds_info = get_heptfds_dataset( + full_config["validation_datasets"][0], + full_config, + num_gpus, + "test", + full_config["setup"]["num_events_validation"], + supervised=False, + ) + ds_val = ds_val.batch(5) - with strategy.scope(): - lr_schedule, optim_callbacks = get_lr_schedule(full_config, steps=total_steps) - callbacks.append(optim_callbacks) - opt = get_optimizer(full_config, lr_schedule) - - model = make_model(full_config, dtype=tf.dtypes.float32) - - # Run model once to build the layers - model.build((1, full_config["dataset"]["padded_num_elem_size"], full_config["dataset"]["num_input_features"])) - - full_config = set_config_loss(full_config, full_config["setup"]["trainable"]) - configure_model_weights(model, full_config["setup"]["trainable"]) - model.build((1, full_config["dataset"]["padded_num_elem_size"], full_config["dataset"]["num_input_features"])) - - loss_dict, loss_weights = get_loss_dict(full_config) - model.compile( - loss=loss_dict, - optimizer=opt, - sample_weight_mode="temporal", - loss_weights=loss_weights, - metrics={ - "cls": [ - FlattenedCategoricalAccuracy(name="acc_unweighted", dtype=tf.float64), - FlattenedCategoricalAccuracy(use_weights=True, name="acc_weighted", dtype=tf.float64), - ] - }, - ) - model.summary() + if ntrain: + ds_train = ds_train.take(ntrain) + num_train_steps = ntrain + if ntest: + ds_test = ds_test.take(ntest) + num_test_steps = ntest + + print("num_train_steps", num_train_steps) + print("num_test_steps", num_test_steps) + total_steps = num_train_steps * full_config["setup"]["num_epochs"] + print("total_steps", total_steps) + + callbacks = prepare_callbacks( + full_config, + tune.get_trial_dir(), + ds_val, + ) + + callbacks = callbacks[:-1] # remove the CustomCallback at the end of the list + + with strategy.scope(): + lr_schedule, optim_callbacks = get_lr_schedule(full_config, steps=total_steps) + callbacks.append(optim_callbacks) + opt = get_optimizer(full_config, lr_schedule) + + model = make_model(full_config, dtype=tf.dtypes.float32) + + # Run model once to build the layers + model.build((1, full_config["dataset"]["padded_num_elem_size"], full_config["dataset"]["num_input_features"])) + + full_config = set_config_loss(full_config, full_config["setup"]["trainable"]) + configure_model_weights(model, full_config["setup"]["trainable"]) + model.build((1, full_config["dataset"]["padded_num_elem_size"], full_config["dataset"]["num_input_features"])) - callbacks.append(TuneReportCheckpointCallback( + loss_dict, loss_weights = get_loss_dict(full_config) + model.compile( + loss=loss_dict, + optimizer=opt, + sample_weight_mode="temporal", + loss_weights=loss_weights, + metrics={ + "cls": [ + FlattenedCategoricalAccuracy(name="acc_unweighted", dtype=tf.float64), + FlattenedCategoricalAccuracy(use_weights=True, name="acc_weighted", dtype=tf.float64), + ] + }, + ) + model.summary() + + callbacks.append( + TuneReportCheckpointCallback( metrics=[ "adam_beta_1", - 'charge_loss', + "charge_loss", "cls_acc_unweighted", "cls_loss", "cos_phi_loss", @@ -684,30 +673,30 @@ def build_model_and_train(config, checkpoint_dir=None, full_config=None, ntrain= "val_loss", "val_pt_loss", "val_sin_phi_loss", - ], - ), + ], + ), + ) + + try: + model.fit( + ds_train.repeat(), + validation_data=ds_test.repeat(), + epochs=full_config["setup"]["num_epochs"], + callbacks=callbacks, + steps_per_epoch=num_train_steps, + validation_steps=num_test_steps, ) + except tf.errors.ResourceExhaustedError: + logging.warning("Resource exhausted, skipping this hyperparameter configuration.") + skiplog_file_path = Path(full_config["raytune"]["local_dir"]) / name / "skipped_configurations.txt" + lines = ["{}: {}\n".format(item[0], item[1]) for item in config.items()] - try: - fit_result = model.fit( - ds_train.repeat(), - validation_data=ds_test.repeat(), - epochs=full_config["setup"]["num_epochs"], - callbacks=callbacks, - steps_per_epoch=num_train_steps, - validation_steps=num_test_steps, - ) - except tf.errors.ResourceExhaustedError: - logging.warning("Resource exhausted, skipping this hyperparameter configuration.") - skiplog_file_path = Path(full_config["raytune"]["local_dir"]) / name / "skipped_configurations.txt" - lines = ["{}: {}\n".format(item[0], item[1]) for item in config.items()] - - with open(skiplog_file_path, "a") as f: - f.write("#"*80 + "\n") - for line in lines: - f.write(line) - logging.warning(line[:-1]) - f.write("#"*80 + "\n\n") + with open(skiplog_file_path, "a") as f: + f.write("#" * 80 + "\n") + for line in lines: + f.write(line) + logging.warning(line[:-1]) + f.write("#" * 80 + "\n\n") @main.command() @@ -726,7 +715,7 @@ def raytune(config, name, local, cpus, gpus, tune_result_dir, resume, ntrain, nt import ray from ray import tune from ray.tune.logger import TBXLoggerCallback - from raytune.search_space import search_space, raytune_num_samples + from raytune.search_space import raytune_num_samples, search_space from raytune.utils import get_raytune_schedule, get_raytune_search_alg if seeds: @@ -748,12 +737,16 @@ def raytune(config, name, local, cpus, gpus, tune_result_dir, resume, ntrain, nt expdir = Path(cfg["raytune"]["local_dir"]) / name expdir.mkdir(parents=True, exist_ok=True) - shutil.copy("mlpf/raytune/search_space.py", str(Path(cfg["raytune"]["local_dir"]) / name / "search_space.py")) # Copy the config file to the train dir for later reference - shutil.copy(config_file_path, str(Path(cfg["raytune"]["local_dir"]) / name / "config.yaml")) # Copy the config file to the train dir for later reference + shutil.copy( + "mlpf/raytune/search_space.py", str(Path(cfg["raytune"]["local_dir"]) / name / "search_space.py") + ) # Copy the config file to the train dir for later reference + shutil.copy( + config_file_path, str(Path(cfg["raytune"]["local_dir"]) / name / "config.yaml") + ) # Copy the config file to the train dir for later reference ray.tune.ray_trial_executor.DEFAULT_GET_TIMEOUT = 1 * 60 * 60 # Avoid timeout errors if not local: - ray.init(address='auto') + ray.init(address="auto") sched = get_raytune_schedule(cfg["raytune"]) search_alg = get_raytune_search_alg(cfg["raytune"], seeds) @@ -780,8 +773,10 @@ def raytune(config, name, local, cpus, gpus, tune_result_dir, resume, ntrain, nt end = datetime.now() print("Total time of tune.run(...): {}".format(end - start)) - print("Best hyperparameters found according to {} were: ".format(cfg["raytune"]["default_metric"]), - analysis.get_best_config(cfg["raytune"]["default_metric"], cfg["raytune"]["default_mode"])) + print( + "Best hyperparameters found according to {} were: ".format(cfg["raytune"]["default_metric"]), + analysis.get_best_config(cfg["raytune"]["default_metric"], cfg["raytune"]["default_mode"]), + ) skip = 20 if skip > cfg["setup"]["num_epochs"]: @@ -822,36 +817,11 @@ def count_skipped(exp_dir): @click.option("--mode", help="experiment dir", type=str, default="min") def raytune_analysis(exp_dir, save, skip, mode, metric): from ray.tune import ExperimentAnalysis + experiment_analysis = ExperimentAnalysis(exp_dir, default_metric=metric, default_mode=mode) plot_ray_analysis(experiment_analysis, save=save, skip=skip) analyze_ray_experiment(exp_dir, default_metric=metric, default_mode=mode) -@main.command() -@click.help_option("-h", "--help") -@click.option("-c", "--config", help="configuration file", type=click.Path()) -@click.option("--ntrain", default=None, help="override the number of training events", type=int) -@click.option("--ntest", default=None, help="override the number of testing events", type=int) -def debug_data(config, ntrain, ntest): - """Train a model defined by config""" - config, config_file_stem, global_batch_size, n_train, n_test, n_epochs, weights = parse_config( - config, ntrain, ntest, weights=None, - ) - - dataset_def = get_dataset_def(config) - ds_train, ds_test, dataset_transform = get_train_val_datasets(config, global_batch_size=1, n_train=n_train, n_test=n_test) - - # cand_counts = np.zeros(8) - # for data_item in tqdm(ds_train, desc="Counting"): - # import pdb; pdb.set_trace() - # cand_vals, cand_count = np.unique(np.argmax(data_item[1]['cls'], axis=2), return_counts=True) - # cand_counts[cand_vals.astype("int32")] += cand_count - # print("cand_counts: ", cand_counts) - - dsf = CMSDatasetFactory(config) - ds_train, _ = dsf.get_dataset(split="train") - ds_test, _ = dsf.get_dataset(split="test") - for data_item in tqdm(ds_train, desc="Counting"): - import pdb; pdb.set_trace() if __name__ == "__main__": main() diff --git a/mlpf/tfmodel/data.py b/mlpf/tfmodel/data.py deleted file mode 100644 index 9e69459c6..000000000 --- a/mlpf/tfmodel/data.py +++ /dev/null @@ -1,7 +0,0 @@ - -class Dataset: - def __init__(self, **kwargs): - self.num_input_features = kwargs.get("num_input_features") - self.num_output_features = kwargs.get("num_output_features") - self.padded_num_elem_size = kwargs.get("padded_num_elem_size") - self.schema = kwargs.get("schema") diff --git a/mlpf/tfmodel/datasets/BaseDatasetFactory.py b/mlpf/tfmodel/datasets/BaseDatasetFactory.py index 0b0f9c6a9..7175d7e96 100644 --- a/mlpf/tfmodel/datasets/BaseDatasetFactory.py +++ b/mlpf/tfmodel/datasets/BaseDatasetFactory.py @@ -1,18 +1,17 @@ import tensorflow as tf -import tensorflow_datasets as tfds -import heptfds -#Unpacks a flat target array along the feature axis to a feature dict -#the feature order is defined in the data prep stage (postprocessing2.py) + +# Unpacks a flat target array along the feature axis to a feature dict +# the feature order is defined in the data prep stage (postprocessing2.py) def unpack_target(y, num_output_classes, config): - msk_pid = tf.cast(y[..., 0:1]!=0, tf.float32) - - pt = y[..., 2:3]*msk_pid - energy = y[..., 6:7]*msk_pid - eta = y[..., 3:4]*msk_pid - sin_phi = y[..., 4:5]*msk_pid - cos_phi = y[..., 5:6]*msk_pid - jet_idx = y[..., 7:8]*msk_pid + msk_pid = tf.cast(y[..., 0:1] != 0, tf.float32) + + pt = y[..., 2:3] * msk_pid + energy = y[..., 6:7] * msk_pid + eta = y[..., 3:4] * msk_pid + sin_phi = y[..., 4:5] * msk_pid + cos_phi = y[..., 5:6] * msk_pid + jet_idx = y[..., 7:8] * msk_pid ret = { "cls": tf.one_hot(tf.cast(y[..., 0], tf.int32), num_output_classes), @@ -30,6 +29,7 @@ def unpack_target(y, num_output_classes, config): return ret + class BaseDatasetFactory: def __init__(self, config): self.cfg = config @@ -38,35 +38,37 @@ def get_map_to_supervised(self): target_particles = self.cfg["dataset"]["target_particles"] num_output_classes = self.cfg["dataset"]["num_output_classes"] assert target_particles in ["gen", "cand"], "Target particles has to be 'cand' or 'gen'." + def func(data_item): X = data_item["X"] y = data_item["y{}".format(target_particles)] - #mask to keep only nonzero elements - msk_elems = tf.cast(X[:, 0:1]!=0, tf.float32) + # mask to keep only nonzero elements + msk_elems = tf.cast(X[:, 0:1] != 0, tf.float32) - #mask to keep only nonzero target particles - msk_signal = tf.cast(y[:, 0:1]!=0, tf.float32) + # mask to keep only nonzero target particles + msk_signal = tf.cast(y[:, 0:1] != 0, tf.float32) target = unpack_target(y, num_output_classes, self.cfg) - #inputs: X - #targets: dict by classification (cls) and regression feature columns - #weights: dict of weights for each target + # inputs: X + # targets: dict by classification (cls) and regression feature columns + # weights: dict of weights for each target return ( X, target, { "cls": msk_elems, - "charge": msk_elems*msk_signal, - "pt": msk_elems*msk_signal, - "eta": msk_elems*msk_signal, - "sin_phi": msk_elems*msk_signal, - "cos_phi": msk_elems*msk_signal, - "energy": msk_elems*msk_signal, - } + "charge": msk_elems * msk_signal, + "pt": msk_elems * msk_signal, + "eta": msk_elems * msk_signal, + "sin_phi": msk_elems * msk_signal, + "cos_phi": msk_elems * msk_signal, + "energy": msk_elems * msk_signal, + }, ) + return func - + def get_dataset(self, split, max_examples_per_split=None): raise NotImplementedError diff --git a/mlpf/tfmodel/datasets/CMSDatasetFactory.py b/mlpf/tfmodel/datasets/CMSDatasetFactory.py index 00ce69fae..8fdaddabd 100644 --- a/mlpf/tfmodel/datasets/CMSDatasetFactory.py +++ b/mlpf/tfmodel/datasets/CMSDatasetFactory.py @@ -1,9 +1,7 @@ -import tensorflow as tf -import tensorflow_datasets as tfds -import heptfds - from tfmodel.datasets import BaseDatasetFactory +import tensorflow_datasets as tfds + class CMSDatasetFactory(BaseDatasetFactory): def get_dataset(self, dataset_name, dataset_dict, split, max_examples_per_split=None): diff --git a/mlpf/tfmodel/datasets/DelphesDatasetFactory.py b/mlpf/tfmodel/datasets/DelphesDatasetFactory.py index e3395afa2..67b1db3bd 100644 --- a/mlpf/tfmodel/datasets/DelphesDatasetFactory.py +++ b/mlpf/tfmodel/datasets/DelphesDatasetFactory.py @@ -1,9 +1,7 @@ -import tensorflow as tf -import tensorflow_datasets as tfds -import heptfds - from tfmodel.datasets import BaseDatasetFactory +import tensorflow_datasets as tfds + class DelphesDatasetFactory(BaseDatasetFactory): def get_dataset(self, dataset_name, dataset_dict, split, max_examples_per_split=None): diff --git a/mlpf/tfmodel/delphes_data.py b/mlpf/tfmodel/delphes_data.py deleted file mode 100644 index 342ebb66e..000000000 --- a/mlpf/tfmodel/delphes_data.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np -import glob -import multiprocessing -import os -import pickle -import bz2 - -import tensorflow as tf - -#based on the dataset size distribution, divisible by 8 -padded_num_elem_size = 128*50 - -#based on ntuplizer.py make_tower_array and make_track_array -num_inputs = 12 - -#based on ntuplizer.py make_gen_array -num_outputs = 7 - -def prepare_data(fname): - - if fname.endswith(".pkl"): - data = pickle.load(open(fname, "rb")) - elif fname.endswith(".pkl.bz2"): - data = pickle.load(bz2.BZ2File(fname, "rb")) - else: - raise Exception("Unknown file: {}".format(fname)) - - #make all inputs and outputs the same size with padding - Xs = [] - ygens = [] - ycands = [] - for i in range(len(data["X"])): - X = np.array(data["X"][i][:padded_num_elem_size], np.float32) - X = np.pad(X, [(0, padded_num_elem_size - X.shape[0]), (0,0)]) - - ygen = np.array(data["ygen"][i][:padded_num_elem_size], np.float32) - ygen = np.pad(ygen, [(0, padded_num_elem_size - ygen.shape[0]), (0,0)]) - - ycand = np.array(data["ycand"][i][:padded_num_elem_size], np.float32) - ycand = np.pad(ycand, [(0, padded_num_elem_size - ycand.shape[0]), (0,0)]) - - X = np.expand_dims(X, 0) - ygen = np.expand_dims(ygen, 0) - ycand = np.expand_dims(ycand, 0) - - Xs.append(X) - ygens.append(ygen) - ycands.append(ycand) - - X = [np.concatenate(Xs)] - ygen = [np.concatenate(ygens)] - ycand = [np.concatenate(ycands)] - return X, ygen, ycand - -def parse_args(): - import argparse - parser = argparse.ArgumentParser() - parser.add_argument("--datapath", type=str, required=True, help="Input data path") - parser.add_argument("--num-files-per-tfr", type=int, default=10, help="Number of pickle files to merge to one TFRecord file") - args = parser.parse_args() - return args - -def chunks(lst, n): - """Yield successive n-sized chunks from lst.""" - for i in range(0, len(lst), n): - yield lst[i:i + n] - -#https://stackoverflow.com/questions/47861084/how-to-store-numpy-arrays-as-tfrecord -def _bytes_feature(value): - """Returns a bytes_list from a string / byte.""" - if isinstance(value, type(tf.constant(0))): # if value ist tensor - value = value.numpy() # get value of tensor - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - -def _parse_tfr_element(element): - parse_dic = { - 'X': tf.io.FixedLenFeature([], tf.string), - 'y': tf.io.FixedLenFeature([], tf.string), - 'w': tf.io.FixedLenFeature([], tf.string), - } - example_message = tf.io.parse_single_example(element, parse_dic) - - X = example_message['X'] - arr_X = tf.io.parse_tensor(X, out_type=tf.float32) - y = example_message['y'] - arr_y = tf.io.parse_tensor(y, out_type=tf.float32) - w = example_message['w'] - arr_w = tf.io.parse_tensor(w, out_type=tf.float32) - - #https://github.com/tensorflow/tensorflow/issues/24520#issuecomment-577325475 - arr_X.set_shape(tf.TensorShape((None, num_inputs))) - arr_y.set_shape(tf.TensorShape((None, num_outputs))) - arr_w.set_shape(tf.TensorShape((None, ))) - #inds = tf.stack([arr_dm_row, arr_dm_col], axis=-1) - #dm_sparse = tf.SparseTensor(values=arr_dm_data, indices=inds, dense_shape=[tf.shape(arr_X)[0], tf.shape(arr_X)[0]]) - - return arr_X, arr_y, arr_w - -def serialize_X_y_w(writer, X, y, w): - feature = { - 'X': _bytes_feature(tf.io.serialize_tensor(X)), - 'y': _bytes_feature(tf.io.serialize_tensor(y)), - 'w': _bytes_feature(tf.io.serialize_tensor(w)), - } - sample = tf.train.Example(features=tf.train.Features(feature=feature)) - writer.write(sample.SerializeToString()) - -def serialize_chunk(args): - path, files, ichunk = args - out_filename = os.path.join(path, "chunk_{}.tfrecords".format(ichunk)) - writer = tf.io.TFRecordWriter(out_filename) - Xs = [] - ys = [] - ws = [] - dms = [] - - for fi in files: - X, y, _ = prepare_data(fi) - - Xs += X - ys += y - - Xs = np.concatenate(Xs) - ys = np.concatenate(ys) - assert(Xs.shape[2] == num_inputs) - assert(Xs.shape[1] == padded_num_elem_size) - assert(ys.shape[2] == num_outputs) - assert(ys.shape[1] == padded_num_elem_size) - - #set weights for each sample to be equal to the number of samples of this type - #in the training script, this can be used to compute either inverse or class-balanced weights - uniq_vals, uniq_counts = np.unique(np.concatenate([y[:, 0] for y in ys]), return_counts=True) - for i in range(len(ys)): - w = np.ones(len(ys[i]), dtype=np.float32) - for uv, uc in zip(uniq_vals, uniq_counts): - w[ys[i][:, 0]==uv] = uc - ws += [w] - - for X, y, w in zip(Xs, ys, ws): - serialize_X_y_w(writer, X, y, w) - - print(out_filename) - writer.close() - -if __name__ == "__main__": - args = parse_args() - tf.config.experimental_run_functions_eagerly(True) - - datapath = args.datapath - - filelist = sorted(glob.glob("{}/*.pkl.bz2".format(datapath))) - print("found {} files".format(len(filelist))) - #means, stds = extract_means_stds(filelist) - outpath = "{}/tfr".format(datapath) - - if not os.path.isdir(outpath): - os.makedirs(outpath) - - pars = [] - for ichunk, files in enumerate(chunks(filelist, args.num_files_per_tfr)): - pars += [(outpath, files, ichunk)] - #serialize_chunk(pars[0]) - pool = multiprocessing.Pool(8) - pool.map(serialize_chunk, pars) - #for chunk in pars: - # serialize_chunk(chunk) - - - #Load and test the dataset - tfr_dataset = tf.data.TFRecordDataset(glob.glob(outpath + "/*.tfrecords")) - dataset = tfr_dataset.map(_parse_tfr_element) - num_ev = 0 - num_particles = 0 - for X, y, w in dataset: - num_ev += 1 - num_particles += len(X) - - print("Created TFRecords dataset in {} with {} events, {} particles".format( - datapath, num_ev, num_particles)) diff --git a/mlpf/tfmodel/fast_attention.py b/mlpf/tfmodel/fast_attention.py deleted file mode 100644 index 204f00e78..000000000 --- a/mlpf/tfmodel/fast_attention.py +++ /dev/null @@ -1,486 +0,0 @@ -# coding=utf-8 -# Copyright 2020 The Google Research Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Implementation of multiheaded FAVOR-attention & FAVOR-self-attention layers. - -Prefix Sum Tensorflow implementation by Valerii Likhosherstov. - -Minor modifications for TF 2.3 by Joosep Pata: - - remove seed, use tf.concat in create_projection_matrix, -""" -from . import fast_attention_util as util -import math -import tensorflow as tf - -BIG_CONSTANT = 1e8 - -@tf.function -def create_projection_matrix(m, d, seed=0, scaling=0, dtype=tf.float32): - r"""Constructs the matrix of random projections. - - Constructs a matrix of random orthogonal projections. Each projection vector - has direction chosen uniformly at random and either deterministic length - \sqrt{d} or length taken from the \chi(d) distribution (in the latter case - marginal distributions of the projections are d-dimensional Gaussian vectors - with associated identity covariance matrix). - - Args: - m: number of random projections. - d: dimensionality of each random projection. - seed: random seed used to construct projections. - scaling: 1 if all the random projections need to be renormalized to have - length \sqrt{d}, 0 if the lengths of random projections should follow - \chi(d) distribution. - - Returns: - The matrix of random projections of the shape [m, d]. - """ - nb_full_blocks = int(m / d) - block_list = [] - #current_seed = tf.constant(seed) - for iblock in range(nb_full_blocks): - unstructured_block = tf.random.normal((d, d), dtype=dtype) - q, _ = tf.linalg.qr(unstructured_block) - q = tf.transpose(q) - block_list.append(q) - #current_seed += 1 - remaining_rows = m - nb_full_blocks * d - if remaining_rows > 0: - unstructured_block = tf.random.normal((d, d), dtype=dtype) - q, _ = tf.linalg.qr(unstructured_block) - q = tf.transpose(q) - block_list.append(q[0:remaining_rows]) - final_matrix = tf.concat(block_list, axis=0) - #current_seed += 1 - - if scaling == 0: - multiplier = tf.norm(tf.random.normal((m, d), dtype=dtype), axis=1) - elif scaling == 1: - multiplier = tf.math.sqrt(float(d)) * tf.ones((m)) - else: - raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) - - ret = tf.linalg.matmul(tf.linalg.diag(multiplier), final_matrix) - return ret - -def relu_kernel_transformation(data, - is_query, - projection_matrix=None, - numerical_stabilizer=0.001): - """Computes features for the ReLU-kernel. - - Computes random features for the ReLU kernel from - https://arxiv.org/pdf/2009.14794.pdf. - - Args: - data: input data tensor of the shape [B, L, H, D], where: B - batch - dimension, L - attention dimensions, H - heads, D - features. - is_query: indicates whether input data is a query oor key tensor. - projection_matrix: random Gaussian matrix of shape [M, D], where M stands - for the number of random features and each D x D sub-block has pairwise - orthogonal rows. - numerical_stabilizer: small positive constant for numerical stability. - - Returns: - Corresponding kernel feature map. - """ - del is_query - if projection_matrix is None: - return tf.nn.relu(data) + numerical_stabilizer - else: - ratio = 1.0 / tf.math.sqrt( - tf.dtypes.cast(projection_matrix.shape[0], data.dtype)) - data_dash = ratio * tf.einsum("blhd,md->blhm", data, projection_matrix) - return tf.nn.relu(data_dash) + numerical_stabilizer - - -def softmax_kernel_transformation(data, - is_query, - projection_matrix=None, - numerical_stabilizer=0.000001): - """Computes random features for the softmax kernel using FAVOR+ mechanism. - - Computes random features for the softmax kernel using FAVOR+ mechanism from - https://arxiv.org/pdf/2009.14794.pdf. - - Args: - data: input data tensor of the shape [B, L, H, D], where: B - batch - dimension, L - attention dimensions, H - heads, D - features. - is_query: indicates whether input data is a query oor key tensor. - projection_matrix: random Gaussian matrix of shape [M, D], where M stands - for the number of random features and each D x D sub-block has pairwise - orthogonal rows. - numerical_stabilizer: small positive constant for numerical stability. - - Returns: - Corresponding kernel feature map. - """ - data_normalizer = 1.0 / ( - tf.math.sqrt(tf.math.sqrt(tf.dtypes.cast(data.shape[-1], tf.float32)))) - ratio = 1.0 / tf.math.sqrt( - tf.dtypes.cast(projection_matrix.shape[0], tf.float32)) - data_dash = tf.einsum("blhd,md->blhm", data, projection_matrix) - diag_data = tf.math.square(data) - diag_data = tf.math.reduce_sum( - diag_data, axis=tf.keras.backend.ndim(data) - 1) - diag_data = (diag_data / 2.0) * data_normalizer * data_normalizer - diag_data = tf.expand_dims(diag_data, axis=tf.keras.backend.ndim(data) - 1) - if is_query: - last_dims_t = (len(data_dash.shape) - 1,) - data_dash = ratio * ( - tf.math.exp(data_dash - diag_data - tf.math.reduce_max( - data_dash, axis=last_dims_t, keepdims=True)) + numerical_stabilizer) - else: - data_dash = ratio * ( - tf.math.exp(data_dash - diag_data - tf.math.reduce_max(data_dash)) + - numerical_stabilizer) - - return data_dash - - -def noncausal_numerator(qs, ks, vs): - """Computes not-normalized FAVOR noncausal attention AV. - - Args: - qs: query_prime tensor of the shape [L,B,H,M]. - ks: key_prime tensor of the shape [L,B,H,M]. - vs: value tensor of the shape [L,B,H,D]. - - Returns: - Not-normalized FAVOR noncausal attention AV. - """ - kvs = tf.clip_by_value(tf.einsum("lbhm,lbhd->bhmd", ks, vs), -1e4, 1e4) - return tf.clip_by_value(tf.einsum("lbhm,bhmd->lbhd", qs, kvs), -1e4, 1e4) - - -def noncausal_denominator(qs, ks): - """Computes FAVOR normalizer in noncausal attention. - - Args: - qs: query_prime tensor of the shape [L,B,H,M]. - ks: key_prime tensor of the shape [L,B,H,M]. - - Returns: - FAVOR normalizer in noncausal attention. - """ - all_ones = tf.ones([ks.shape[0]], dtype=qs.dtype) - ks_sum = tf.clip_by_value(tf.einsum("lbhm,l->bhm", ks, all_ones), -1e-4, 1e4) - return tf.clip_by_value(tf.einsum("lbhm,bhm->lbh", qs, ks_sum), -1e-4, 1e4) - - -@tf.custom_gradient -def causal_numerator(qs, ks, vs): - """Computes not-normalized FAVOR causal attention A_{masked}V. - - Args: - qs: query_prime tensor of the shape [L,B,H,M]. - ks: key_prime tensor of the shape [L,B,H,M]. - vs: value tensor of the shape [L,B,H,D]. - - Returns: - Not-normalized FAVOR causal attention A_{masked}V. - """ - - result = [] - sums = tf.zeros_like(tf.einsum("ijk,ijl->ijkl", ks[0], vs[0])) - - for index in range(qs.shape[0]): - sums = sums + tf.einsum("ijk,ijl->ijkl", ks[index], vs[index]) - result.append(tf.einsum("ijkl,ijk->ijl", sums, qs[index])[None, Ellipsis]) - - result = tf.concat(result, axis=0) - - def grad(res_grad): - - grads = tf.zeros_like(tf.einsum("ijk,ijl->ijkl", ks[0], vs[0])) - - gr_sums = sums - - q_grads = [] - k_grads = [] - v_grads = [] - - for index in range(qs.shape[0] - 1, -1, -1): - - q_grads.append( - tf.einsum("ijkl,ijl->ijk", gr_sums, res_grad[index])[None, Ellipsis]) - grads = grads + tf.einsum("ijk,ijl->ijkl", qs[index], res_grad[index]) - k_grads.append(tf.einsum("ijkl,ijl->ijk", grads, vs[index])[None, Ellipsis]) - v_grads.append(tf.einsum("ijkl,ijk->ijl", grads, ks[index])[None, Ellipsis]) - gr_sums = gr_sums - tf.einsum("ijk,ijl->ijkl", ks[index], vs[index]) - - q_grads = tf.concat(q_grads[::-1], axis=0) - k_grads = tf.concat(k_grads[::-1], axis=0) - v_grads = tf.concat(v_grads[::-1], axis=0) - - return q_grads, k_grads, v_grads - - return result, grad - - -@tf.custom_gradient -def causal_denominator(qs, ks): - """Computes FAVOR normalizer in causal attention. - - Args: - qs: query_prime tensor of the shape [L,B,H,M]. - ks: key_prime tensor of the shape [L,B,H,M]. - - Returns: - FAVOR normalizer in causal attention. - """ - - result = [] - sums = tf.zeros_like(ks[0]) - - for index in range(qs.shape[0]): - sums = sums + ks[index] - result.append(tf.reduce_sum(qs[index] * sums, axis=2)[None, Ellipsis]) - - result = tf.concat(result, axis=0) - - def grad(res_grad): - - k_grad = tf.zeros_like(ks[0]) - - gr_sums = sums - - q_grads = [] - k_grads = [] - - for index in range(qs.shape[0] - 1, -1, -1): - - q_grads.append( - tf.einsum("ijk,ij->ijk", gr_sums, res_grad[index])[None, Ellipsis]) - k_grad = k_grad + tf.einsum("ijk,ij->ijk", qs[index], res_grad[index]) - k_grads.append(k_grad[None, Ellipsis]) - gr_sums = gr_sums - ks[index] - - q_grads = tf.concat(q_grads[::-1], axis=0) - k_grads = tf.concat(k_grads[::-1], axis=0) - - return q_grads, k_grads - - return result, grad - - -def favor_attention(query, - key, - value, - kernel_transformation, - causal, - projection_matrix=None): - """Computes FAVOR normalized attention. - - Args: - query: query tensor. - key: key tensor. - value: value tensor. - kernel_transformation: transformation used to get finite kernel features. - causal: whether attention is causal or not. - projection_matrix: projection matrix to be used. - - Returns: - FAVOR normalized attention. - """ - query_prime = kernel_transformation(query, True, - projection_matrix) # [B,L,H,M] - key_prime = kernel_transformation(key, False, projection_matrix) # [B,L,H,M] - query_prime = tf.transpose(query_prime, [1, 0, 2, 3]) # [L,B,H,M] - key_prime = tf.transpose(key_prime, [1, 0, 2, 3]) # [L,B,H,M] - value = tf.transpose(value, [1, 0, 2, 3]) # [L,B,H,D] - - if causal: - av_attention = causal_numerator(query_prime, key_prime, value) - attention_normalizer = causal_denominator(query_prime, key_prime) - else: - av_attention = noncausal_numerator(query_prime, key_prime, value) - attention_normalizer = noncausal_denominator(query_prime, key_prime) - # TODO(kchoro): Add more comments. - av_attention = tf.transpose(av_attention, [1, 0, 2, 3]) - attention_normalizer = tf.transpose(attention_normalizer, [1, 0, 2]) - attention_normalizer = tf.expand_dims(attention_normalizer, - len(attention_normalizer.shape)) - return av_attention / attention_normalizer - - -class Attention(tf.keras.layers.Layer): - """Multi-headed attention layer.""" - - def __init__(self, - hidden_size, - num_heads, - attention_dropout, - kernel_transformation=relu_kernel_transformation, - numerical_stabilizer=0.001, - causal=False, - projection_matrix_type=None, - nb_random_features=0): - """Initialize Attention. - - Args: - hidden_size: int, output dim of hidden layer. - num_heads: int, number of heads to repeat the same attention structure. - attention_dropout: float, dropout rate inside attention for training. - kernel_transformation: transformation used to produce kernel features for - attention. - numerical_stabilizer: used to bound away from zero kernel values. - causal: whether attention is causal or not. - projection_matrix_type: None if Identity should be used, otherwise random - projection matrix will be applied. - nb_random_features: number of random features to be used (relevant only if - projection_matrix is not None). - """ - if hidden_size % num_heads: - raise ValueError( - "Hidden size ({}) must be divisible by the number of heads ({})." - .format(hidden_size, num_heads)) - - super(Attention, self).__init__() - self.hidden_size = hidden_size - self.num_heads = num_heads - self.attention_dropout = attention_dropout - self.kernel_transformation = kernel_transformation - self.numerical_stabilizer = numerical_stabilizer - self.causal = causal - self.projection_matrix_type = projection_matrix_type - self.nb_random_features = nb_random_features - - def build(self, input_shape): - """Builds the layer.""" - # Layers for linearly projecting the queries, keys, and values. - size_per_head = self.hidden_size // self.num_heads - - def _glorot_initializer(fan_in, fan_out): - limit = math.sqrt(6.0 / (fan_in + fan_out)) - return tf.keras.initializers.RandomUniform(minval=-limit, maxval=limit) - - attention_initializer = _glorot_initializer(input_shape.as_list()[-1], - self.hidden_size) - self.query_dense_layer = util.DenseEinsum( - output_shape=(self.num_heads, size_per_head), - kernel_initializer=attention_initializer, - use_bias=False, - name="query") - self.key_dense_layer = util.DenseEinsum( - output_shape=(self.num_heads, size_per_head), - kernel_initializer=attention_initializer, - use_bias=False, - name="key") - self.value_dense_layer = util.DenseEinsum( - output_shape=(self.num_heads, size_per_head), - kernel_initializer=attention_initializer, - use_bias=False, - name="value") - - output_initializer = _glorot_initializer(self.hidden_size, self.hidden_size) - self.output_dense_layer = util.DenseEinsum( - output_shape=self.hidden_size, - num_summed_dimensions=2, - kernel_initializer=output_initializer, - use_bias=False, - name="output_transform") - super(Attention, self).build(input_shape) - - def get_config(self): - return { - "hidden_size": self.hidden_size, - "num_heads": self.num_heads, - "attention_dropout": self.attention_dropout, - } - - def call(self, - query_input, - source_input, - bias, - training, - cache=None, - decode_loop_step=None): - """Apply attention mechanism to query_input and source_input. - - Args: - query_input: A tensor with shape [batch_size, length_query, hidden_size]. - source_input: A tensor with shape [batch_size, length_source, - hidden_size]. - bias: A tensor with shape [batch_size, 1, length_query, length_source], - the attention bias that will be added to the result of the dot product. - training: A bool, whether in training mode or not. - cache: (Used during prediction) A dictionary with tensors containing - results of previous attentions. The dictionary must have the items: - {"k": tensor with shape [batch_size, i, heads, dim_per_head], - "v": tensor with shape [batch_size, i, heads, dim_per_head]} where - i is the current decoded length for non-padded decode, or max - sequence length for padded decode. - decode_loop_step: An integer, step number of the decoding loop. Used only - for autoregressive inference on TPU. - - Returns: - Attention layer output with shape [batch_size, length_query, hidden_size] - """ - # Linearly project the query, key and value using different learned - # projections. Splitting heads is automatically done during the linear - # projections --> [batch_size, length, num_heads, dim_per_head]. - query = self.query_dense_layer(query_input) - key = self.key_dense_layer(source_input) - value = self.value_dense_layer(source_input) - - if self.projection_matrix_type is None: - projection_matrix = None - else: - dim = query.shape[-1] - seed = tf.math.ceil(tf.math.abs(tf.math.reduce_sum(query) * BIG_CONSTANT)) - seed = tf.dtypes.cast(seed, tf.int32) - projection_matrix = create_projection_matrix( - self.nb_random_features, dim, seed=seed, dtype=query_input.dtype) - - if cache is not None: - # Combine cached keys and values with new keys and values. - if decode_loop_step is not None: - cache_k_shape = cache["k"].shape.as_list() - indices = tf.reshape( - tf.one_hot(decode_loop_step, cache_k_shape[1], dtype=key.dtype), - [1, cache_k_shape[1], 1, 1]) - key = cache["k"] + key * indices - cache_v_shape = cache["v"].shape.as_list() - indices = tf.reshape( - tf.one_hot(decode_loop_step, cache_v_shape[1], dtype=value.dtype), - [1, cache_v_shape[1], 1, 1]) - value = cache["v"] + value * indices - else: - key = tf.concat([tf.cast(cache["k"], key.dtype), key], axis=1) - value = tf.concat([tf.cast(cache["v"], value.dtype), value], axis=1) - - # Update cache - cache["k"] = key - cache["v"] = value - - attention_output = favor_attention(query, key, value, - self.kernel_transformation, self.causal, - projection_matrix) - attention_output = self.output_dense_layer(attention_output) - return attention_output - - -class SelfAttention(Attention): - """Multiheaded self-attention layer.""" - - def call(self, - query_input, - bias, - training, - cache=None, - decode_loop_step=None): - return super(SelfAttention, self).call(query_input, query_input, bias, - training, cache, decode_loop_step) diff --git a/mlpf/tfmodel/fast_attention_util.py b/mlpf/tfmodel/fast_attention_util.py deleted file mode 100644 index c29ff4550..000000000 --- a/mlpf/tfmodel/fast_attention_util.py +++ /dev/null @@ -1,195 +0,0 @@ -# coding=utf-8 -# Copyright 2020 The Google Research Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Keras-based einsum layer. - -Copied from -https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/dense_einsum.py. -""" -# pylint: disable=g-classes-have-attributes - -import tensorflow as tf - -_CHR_IDX = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"] - - -@tf.keras.utils.register_keras_serializable(package="Text") -class DenseEinsum(tf.keras.layers.Layer): - """A densely connected layer that uses tf.einsum as the backing computation. - - This layer can perform einsum calculations of arbitrary dimensionality. - - Arguments: - output_shape: Positive integer or tuple, dimensionality of the output space. - num_summed_dimensions: The number of dimensions to sum over. Standard 2D - matmul should use 1, 3D matmul should use 2, and so forth. - activation: Activation function to use. If you don't specify anything, no - activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix. - bias_initializer: Initializer for the bias vector. - kernel_regularizer: Regularizer function applied to the `kernel` weights - matrix. - bias_regularizer: Regularizer function applied to the bias vector. - activity_regularizer: Regularizer function applied to the output of the - layer (its "activation").. - kernel_constraint: Constraint function applied to the `kernel` weights - matrix. - bias_constraint: Constraint function applied to the bias vector. - Input shape: - N-D tensor with shape: `(batch_size, ..., input_dim)`. The most common - situation would be a 2D input with shape `(batch_size, input_dim)`. - Output shape: - N-D tensor with shape: `(batch_size, ..., units)`. For instance, for a 2D - input with shape `(batch_size, input_dim)`, the output would have shape - `(batch_size, units)`. - """ - - def __init__(self, - output_shape, - num_summed_dimensions=1, - activation=None, - use_bias=True, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(DenseEinsum, self).__init__(**kwargs) - self._output_shape = output_shape if isinstance( - output_shape, (list, tuple)) else (output_shape,) - self._activation = tf.keras.activations.get(activation) - self._use_bias = use_bias - self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) - self._bias_initializer = tf.keras.initializers.get(bias_initializer) - self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) - self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) - self._kernel_constraint = tf.keras.constraints.get(kernel_constraint) - self._bias_constraint = tf.keras.constraints.get(bias_constraint) - self._num_summed_dimensions = num_summed_dimensions - self._einsum_string = None - - def _build_einsum_string(self, free_input_dims, bound_dims, output_dims): - input_str = "" - kernel_str = "" - output_str = "" - letter_offset = 0 - for i in range(free_input_dims): - char = _CHR_IDX[i + letter_offset] - input_str += char - output_str += char - - letter_offset += free_input_dims - for i in range(bound_dims): - char = _CHR_IDX[i + letter_offset] - input_str += char - kernel_str += char - - letter_offset += bound_dims - for i in range(output_dims): - char = _CHR_IDX[i + letter_offset] - kernel_str += char - output_str += char - - return input_str + "," + kernel_str + "->" + output_str - - def build(self, input_shape): - input_shape = tf.TensorShape(input_shape) - input_rank = input_shape.rank - free_input_dims = input_rank - self._num_summed_dimensions - output_dims = len(self._output_shape) - - self._einsum_string = self._build_einsum_string(free_input_dims, - self._num_summed_dimensions, - output_dims) - - # This is only saved for testing purposes. - self._kernel_shape = ( - input_shape[free_input_dims:].concatenate(self._output_shape)) - - self._kernel = self.add_weight( - "kernel", - shape=self._kernel_shape, - initializer=self._kernel_initializer, - regularizer=self._kernel_regularizer, - constraint=self._kernel_constraint, - dtype=self.dtype, - trainable=True) - if self._use_bias: - self._bias = self.add_weight( - "bias", - shape=self._output_shape, - initializer=self._bias_initializer, - regularizer=self._bias_regularizer, - constraint=self._bias_constraint, - dtype=self.dtype, - trainable=True) - else: - self._bias = None - super(DenseEinsum, self).build(input_shape) - - def get_config(self): - config = { - "output_shape": - self._output_shape, - "num_summed_dimensions": - self._num_summed_dimensions, - "activation": - tf.keras.activations.serialize(self._activation), - "use_bias": - self._use_bias, - "kernel_initializer": - tf.keras.initializers.serialize(self._kernel_initializer), - "bias_initializer": - tf.keras.initializers.serialize(self._bias_initializer), - "kernel_regularizer": - tf.keras.regularizers.serialize(self._kernel_regularizer), - "bias_regularizer": - tf.keras.regularizers.serialize(self._bias_regularizer), - "activity_regularizer": - tf.keras.regularizers.serialize(self._activity_regularizer), - "kernel_constraint": - tf.keras.constraints.serialize(self._kernel_constraint), - "bias_constraint": - tf.keras.constraints.serialize(self._bias_constraint) - } - base_config = super(DenseEinsum, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def call(self, inputs): - ret = tf.einsum(self._einsum_string, inputs, self._kernel) - if self._use_bias: - ret += self._bias - if self._activation is not None: - ret = self._activation(ret) - return ret diff --git a/mlpf/tfmodel/model.py b/mlpf/tfmodel/model.py index 418c5492e..d4036f288 100644 --- a/mlpf/tfmodel/model.py +++ b/mlpf/tfmodel/model.py @@ -1,19 +1,17 @@ -# This file contains the generic MLPF model definitions -# PFNetDense: the GNN-based model with graph building based on LSH and a Gaussian distance kernel -# PFNetTransformer: the transformer-based model using fast attention - import tensorflow as tf -from tfmodel.utils import batched_histogram_2d +# FIXME: this should be configurable regularizer_weight = 0.0 + def split_indices_to_bins(cmul, nbins, bin_size): bin_idx = tf.argmax(cmul, axis=-1) bins_split = tf.reshape(tf.argsort(bin_idx), (nbins, bin_size)) return bins_split + def split_indices_to_bins_batch(cmul, nbins, bin_size, msk): - bin_idx = tf.argmax(cmul, axis=-1) + tf.cast(tf.where(~msk, nbins-1, 0), tf.int64) + bin_idx = tf.argmax(cmul, axis=-1) + tf.cast(tf.where(~msk, nbins - 1, 0), tf.int64) bins_split = tf.reshape(tf.argsort(bin_idx), (tf.shape(cmul)[0], nbins, bin_size)) return bins_split @@ -28,39 +26,42 @@ def pairwise_l2_dist(A, B): # return pairwise euclidean difference matrix # note that this matrix multiplication can go out of range for float16 in case the absolute values of A and B are large - D = tf.sqrt(tf.maximum(na - 2*tf.matmul(A, B, False, True) + nb, 1e-6)) + D = tf.sqrt(tf.maximum(na - 2 * tf.matmul(A, B, False, True) + nb, 1e-6)) return D + def pairwise_l1_dist(A, B): na = tf.expand_dims(A, -2) nb = tf.expand_dims(B, -3) - D = tf.abs(tf.reduce_sum(na-nb, axis=-1)) + D = tf.abs(tf.reduce_sum(na - nb, axis=-1)) return D + def pairwise_learnable_dist(A, B, ffn, training=False): shp = tf.shape(A) - #stack node feature vectors of src[i], dst[j] into a matrix res[i,j] = (src[i], dst[j]) + # stack node feature vectors of src[i], dst[j] into a matrix res[i,j] = (src[i], dst[j]) mg = tf.meshgrid(tf.range(shp[0]), tf.range(shp[1]), tf.range(shp[2]), tf.range(shp[2]), indexing="ij") - inds1 = tf.stack([mg[0],mg[1],mg[2]], axis=-1) - inds2 = tf.stack([mg[0],mg[1],mg[3]], axis=-1) - res = tf.concat([ - tf.gather_nd(A, inds1), - tf.gather_nd(B, inds2)], axis=-1 - ) #(batch, bin, elem, elem, feat) - - #run a feedforward net on ffn(src, dst) -> output_dim + inds1 = tf.stack([mg[0], mg[1], mg[2]], axis=-1) + inds2 = tf.stack([mg[0], mg[1], mg[3]], axis=-1) + res = tf.concat([tf.gather_nd(A, inds1), tf.gather_nd(B, inds2)], axis=-1) # (batch, bin, elem, elem, feat) + + # run a feedforward net on ffn(src, dst) -> output_dim res_transformed = ffn(res, training=training) return res_transformed + def pairwise_sigmoid_dist(A, B): - return tf.nn.sigmoid(tf.matmul(A, tf.transpose(B, perm=[0,2,1]))) + return tf.nn.sigmoid(tf.matmul(A, tf.transpose(B, perm=[0, 2, 1]))) + """ sp_a: (nbatch, nelem, nelem) sparse distance matrices b: (nbatch, nelem, ncol) dense per-element feature matrices """ + + def sparse_dense_matmult_batch(sp_a, b): dtype = b.dtype @@ -71,41 +72,39 @@ def sparse_dense_matmult_batch(sp_a, b): def map_function(x): i, dense_slice = x[0], x[1] num_points = tf.shape(b)[1] - sparse_slice = tf.sparse.reshape(tf.sparse.slice( - tf.cast(sp_a, tf.float32), [i, 0, 0], [1, num_points, num_points]), - [num_points, num_points]) + sparse_slice = tf.sparse.reshape( + tf.sparse.slice(tf.cast(sp_a, tf.float32), [i, 0, 0], [1, num_points, num_points]), [num_points, num_points] + ) mult_slice = tf.sparse.sparse_dense_matmul(sparse_slice, dense_slice) return mult_slice elems = (tf.range(0, num_batches, delta=1, dtype=tf.int64), b) ret = tf.map_fn(map_function, elems, fn_output_signature=tf.TensorSpec((None, None), b.dtype), back_prop=True) - return tf.cast(ret, dtype) + return tf.cast(ret, dtype) + @tf.function def reverse_lsh(bins_split, points_binned_enc): # batch_dim = points_binned_enc.shape[0] # n_points = points_binned_enc.shape[1]*points_binned_enc.shape[2] # n_features = points_binned_enc.shape[-1] - + shp = tf.shape(points_binned_enc) batch_dim = shp[0] - n_points = shp[1]*shp[2] + n_points = shp[1] * shp[2] n_features = shp[-1] bins_split_flat = tf.reshape(bins_split, (batch_dim, n_points)) points_binned_enc_flat = tf.reshape(points_binned_enc, (batch_dim, n_points, n_features)) - + batch_inds = tf.reshape(tf.repeat(tf.range(batch_dim), n_points), (batch_dim, n_points)) bins_split_flat_batch = tf.stack([batch_inds, bins_split_flat], axis=-1) - ret = tf.scatter_nd( - bins_split_flat_batch, - points_binned_enc_flat, - shape=(batch_dim, n_points, n_features) - ) - + ret = tf.scatter_nd(bins_split_flat_batch, points_binned_enc_flat, shape=(batch_dim, n_points, n_features)) + return ret + class InputEncoding(tf.keras.layers.Layer): def __init__(self, num_input_classes): super(InputEncoding, self).__init__() @@ -113,17 +112,19 @@ def __init__(self, num_input_classes): """ X: [Nbatch, Nelem, Nfeat] array of all the input detector element feature data - """ + """ + @tf.function def call(self, X): - #X[:, :, 0] - categorical index of the element type + # X[:, :, 0] - categorical index of the element type Xid = tf.cast(tf.one_hot(tf.cast(X[:, :, 0], tf.int32), self.num_input_classes), dtype=X.dtype) - #X[:, :, 1:] - all the other non-categorical features + # X[:, :, 1:] - all the other non-categorical features Xprop = X[:, :, 1:] return tf.concat([Xid, Xprop], axis=-1) + """ For the CMS dataset, precompute additional features: - log of pt and energy @@ -131,6 +132,8 @@ def call(self, X): - sin, cos of phi angles - scale layer and depth values (small integers) to a larger dynamic range """ + + class InputEncodingCMS(tf.keras.layers.Layer): def __init__(self, num_input_classes): super(InputEncodingCMS, self).__init__() @@ -138,9 +141,10 @@ def __init__(self, num_input_classes): """ X: [Nbatch, Nelem, Nfeat] array of all the input detector element feature data - """ + """ + def call(self, X): - #X[:, :, 0] - categorical index of the element type + # X[:, :, 0] - categorical index of the element type Xid = tf.cast(tf.one_hot(tf.cast(X[:, :, 0], tf.int32), self.num_input_classes), dtype=X.dtype) Xpt = tf.expand_dims(tf.math.log(X[:, :, 1] + 1.0), axis=-1) Xe = tf.expand_dims(tf.math.log(X[:, :, 4] + 1.0), axis=-1) @@ -162,18 +166,30 @@ def call(self, X): Xphi_hcal1 = tf.expand_dims(tf.sin(X[:, :, 12]), axis=-1) Xphi_hcal2 = tf.expand_dims(tf.cos(X[:, :, 12]), axis=-1) - return tf.concat([ - Xid, - Xpt, Xpt_0p5, Xpt_2, - Xeta1, Xeta2, - Xabs_eta, - Xphi1, Xphi2, - Xe, Xe_0p5, Xe_2, - Xphi_ecal1, Xphi_ecal2, - Xphi_hcal1, Xphi_hcal2, - X], axis=-1 + return tf.concat( + [ + Xid, + Xpt, + Xpt_0p5, + Xpt_2, + Xeta1, + Xeta2, + Xabs_eta, + Xphi1, + Xphi2, + Xe, + Xe_0p5, + Xe_2, + Xphi_ecal1, + Xphi_ecal2, + Xphi_hcal1, + Xphi_hcal2, + X, + ], + axis=-1, ) + class GHConvDense(tf.keras.layers.Layer): def __init__(self, *args, **kwargs): self.activation = getattr(tf.keras.activations, kwargs.pop("activation")) @@ -185,40 +201,66 @@ def __init__(self, *args, **kwargs): def build(self, input_shape): self.hidden_dim = input_shape[0][-1] self.nelem = input_shape[0][-2] - self.W_t = self.add_weight(shape=(self.hidden_dim, self.output_dim), name="w_t", initializer="random_normal", trainable=True, regularizer=tf.keras.regularizers.L1(regularizer_weight)) - self.b_t = self.add_weight(shape=(self.output_dim,), name="b_t", initializer="random_normal", trainable=True, regularizer=tf.keras.regularizers.L1(regularizer_weight)) - self.W_h = self.add_weight(shape=(self.hidden_dim, self.output_dim), name="w_h", initializer="random_normal", trainable=True, regularizer=tf.keras.regularizers.L1(regularizer_weight)) - self.theta = self.add_weight(shape=(self.hidden_dim, self.output_dim), name="theta", initializer="random_normal", trainable=True, regularizer=tf.keras.regularizers.L1(regularizer_weight)) - + self.W_t = self.add_weight( + shape=(self.hidden_dim, self.output_dim), + name="w_t", + initializer="random_normal", + trainable=True, + regularizer=tf.keras.regularizers.L1(regularizer_weight), + ) + self.b_t = self.add_weight( + shape=(self.output_dim,), + name="b_t", + initializer="random_normal", + trainable=True, + regularizer=tf.keras.regularizers.L1(regularizer_weight), + ) + self.W_h = self.add_weight( + shape=(self.hidden_dim, self.output_dim), + name="w_h", + initializer="random_normal", + trainable=True, + regularizer=tf.keras.regularizers.L1(regularizer_weight), + ) + self.theta = self.add_weight( + shape=(self.hidden_dim, self.output_dim), + name="theta", + initializer="random_normal", + trainable=True, + regularizer=tf.keras.regularizers.L1(regularizer_weight), + ) + """ x: [batches, bins, elements, features] adj: [batches, bins, elements, elements] msk: [batches, bins, elements] """ + def call(self, inputs): x, adj, msk = inputs adj = tf.squeeze(adj) - - #compute the normalization of the adjacency matrix + + # compute the normalization of the adjacency matrix if self.normalize_degrees: - #in_degrees = tf.clip_by_value(tf.reduce_sum(tf.abs(adj), axis=-1), 0, 1000) + # in_degrees = tf.clip_by_value(tf.reduce_sum(tf.abs(adj), axis=-1), 0, 1000) in_degrees = tf.reduce_sum(tf.abs(adj), axis=-1) - #add epsilon to prevent numerical issues from 1/sqrt(x) - norm = tf.expand_dims(tf.pow(in_degrees + 1e-6, -0.5), -1)*msk + # add epsilon to prevent numerical issues from 1/sqrt(x) + norm = tf.expand_dims(tf.pow(in_degrees + 1e-6, -0.5), -1) * msk - f_hom = tf.linalg.matmul(x*msk, self.theta)*msk + f_hom = tf.linalg.matmul(x * msk, self.theta) * msk if self.normalize_degrees: - f_hom = tf.linalg.matmul(adj, f_hom*norm)*norm + f_hom = tf.linalg.matmul(adj, f_hom * norm) * norm else: f_hom = tf.linalg.matmul(adj, f_hom) - f_het = tf.linalg.matmul(x*msk, self.W_h) + f_het = tf.linalg.matmul(x * msk, self.W_h) gate = tf.nn.sigmoid(tf.linalg.matmul(x, self.W_t) + self.b_t) - out = gate*f_hom + (1.0-gate)*f_het - return self.activation(out)*msk + out = gate * f_hom + (1.0 - gate) * f_het + return self.activation(out) * msk + class NodeMessageLearnable(tf.keras.layers.Layer): def __init__(self, *args, **kwargs): @@ -232,27 +274,30 @@ def __init__(self, *args, **kwargs): self.hidden_dim, num_layers=self.num_layers, activation=self.activation, - name=kwargs.get("name")+"_ffn" + name=kwargs.get("name") + "_ffn", ) super(NodeMessageLearnable, self).__init__(*args, **kwargs) def call(self, inputs): x, adj, msk = inputs - #collect incoming messages (batch, bins, elems, elems, msg_dim) -> (batch, bins, elems, msg_dim) + # collect incoming messages (batch, bins, elems, elems, msg_dim) -> (batch, bins, elems, msg_dim) max_message_dst = tf.reduce_max(adj, axis=-2) - #collect outgoing messages (batch, bins, elems, elems, msg_dim) -> (batch, bins, elems, msg_dim) + # collect outgoing messages (batch, bins, elems, elems, msg_dim) -> (batch, bins, elems, msg_dim) max_message_src = tf.reduce_max(adj, axis=-3) - #node update (batch, bins, elems, elems, elem_dim + msg_dim + msg_dim) + # node update (batch, bins, elems, elems, elem_dim + msg_dim + msg_dim) x2 = tf.concat([x, max_message_dst, max_message_src], axis=-1) return tf.cast(self.activation(self.ffn(x2)), x.dtype) -def point_wise_feed_forward_network(d_model, dff, name, num_layers=1, activation='elu', dtype=tf.dtypes.float32, dim_decrease=False, dropout=0.0): + +def point_wise_feed_forward_network( + d_model, dff, name, num_layers=1, activation="elu", dtype=tf.dtypes.float32, dim_decrease=False, dropout=0.0 +): if regularizer_weight > 0: - bias_regularizer = tf.keras.regularizers.L1(regularizer_weight) + bias_regularizer = tf.keras.regularizers.L1(regularizer_weight) kernel_regularizer = tf.keras.regularizers.L1(regularizer_weight) else: bias_regularizer = None @@ -262,30 +307,35 @@ def point_wise_feed_forward_network(d_model, dff, name, num_layers=1, activation for ilayer in range(num_layers): _name = name + "_dense_{}".format(ilayer) - layers.append(tf.keras.layers.Dense( - dff, activation=activation, bias_regularizer=bias_regularizer, - kernel_regularizer=kernel_regularizer, name=_name)) + layers.append( + tf.keras.layers.Dense( + dff, + activation=activation, + bias_regularizer=bias_regularizer, + kernel_regularizer=kernel_regularizer, + name=_name, + ) + ) - if dropout>0.0: + if dropout > 0.0: layers.append(tf.keras.layers.Dropout(dropout)) if dim_decrease: dff = dff // 2 - layers.append(tf.keras.layers.Dense(d_model, dtype=dtype, name="{}_dense_{}".format(name, ilayer+1))) + layers.append(tf.keras.layers.Dense(d_model, dtype=dtype, name="{}_dense_{}".format(name, ilayer + 1))) return tf.keras.Sequential(layers, name=name) + def get_message_layer(config_dict, name): config_dict = config_dict.copy() class_name = config_dict.pop("type") - classes = { - "NodeMessageLearnable": NodeMessageLearnable, - "GHConvDense": GHConvDense - } + classes = {"NodeMessageLearnable": NodeMessageLearnable, "GHConvDense": GHConvDense} conv_cls = classes[class_name] return conv_cls(name=name, **config_dict) + class NodePairGaussianKernel(tf.keras.layers.Layer): def __init__(self, **kwargs): self.clip_value_low = kwargs.pop("clip_value_low", 0.0) @@ -305,13 +355,15 @@ def __init__(self, **kwargs): returns: (n_batch, n_bins, n_points, n_points, 1) message matrix """ + def call(self, x_msg_binned, msk, training=False): - x = x_msg_binned*msk + x = x_msg_binned * msk dm = tf.expand_dims(self.dist_norm(x, x), axis=-1) - dm = tf.exp(-self.dist_mult*dm) + dm = tf.exp(-self.dist_mult * dm) dm = tf.clip_by_value(dm, self.clip_value_low, 1) return dm + class NodePairTrainableKernel(tf.keras.layers.Layer): def __init__(self, output_dim=4, hidden_dim_node=128, hidden_dim_pair=32, num_layers=1, activation="elu", **kwargs): self.output_dim = output_dim @@ -325,7 +377,7 @@ def __init__(self, output_dim=4, hidden_dim_node=128, hidden_dim_pair=32, num_la self.hidden_dim_node, kwargs.get("name") + "_" + "node", num_layers=self.num_layers, - activation=self.activation + activation=self.activation, ) self.pair_kernel = point_wise_feed_forward_network( @@ -333,7 +385,7 @@ def __init__(self, output_dim=4, hidden_dim_node=128, hidden_dim_pair=32, num_la self.hidden_dim_pair, kwargs.get("name") + "_" + "pair_kernel", num_layers=self.num_layers, - activation=self.activation + activation=self.activation, ) super(NodePairTrainableKernel, self).__init__(**kwargs) @@ -343,6 +395,7 @@ def __init__(self, output_dim=4, hidden_dim_node=128, hidden_dim_pair=32, num_la returns: (n_batch, n_bins, n_points, n_points, output_dim) message matrix """ + def call(self, x_msg_binned, msk, training=False): node_proj = self.activation(self.ffn_node(x_msg_binned)) @@ -350,17 +403,16 @@ def call(self, x_msg_binned, msk, training=False): dm = tf.cast(pairwise_learnable_dist(node_proj, node_proj, self.pair_kernel, training=training), x_msg_binned.dtype) return dm + def build_kernel_from_conf(kernel_dict, name): kernel_dict = kernel_dict.copy() cls_type = kernel_dict.pop("type") - clss = { - "NodePairGaussianKernel": NodePairGaussianKernel, - "NodePairTrainableKernel": NodePairTrainableKernel - } + clss = {"NodePairGaussianKernel": NodePairGaussianKernel, "NodePairTrainableKernel": NodePairTrainableKernel} return clss[cls_type](name=name, **kernel_dict) + class MessageBuildingLayerLSH(tf.keras.layers.Layer): def __init__(self, distance_dim=128, max_num_bins=200, bin_size=128, kernel=NodePairGaussianKernel(), **kwargs): self.distance_dim = distance_dim @@ -371,43 +423,44 @@ def __init__(self, distance_dim=128, max_num_bins=200, bin_size=128, kernel=Node super(MessageBuildingLayerLSH, self).__init__(**kwargs) def build(self, input_shape): - #(n_batch, n_points, n_features) - - #generate the LSH codebook for random rotations (num_features, max_num_bins/2) + # (n_batch, n_points, n_features) + + # generate the LSH codebook for random rotations (num_features, max_num_bins/2) self.codebook_random_rotations = self.add_weight( - shape=(self.distance_dim, self.max_num_bins//2), initializer="random_normal", - trainable=False, name="lsh_projections" + shape=(self.distance_dim, self.max_num_bins // 2), + initializer="random_normal", + trainable=False, + name="lsh_projections", ) - + """ x_msg: (n_batch, n_points, n_msg_features) x_node: (n_batch, n_points, n_node_features) """ + def call(self, x_msg, x_node, msk, training=False): msk_f = tf.expand_dims(tf.cast(msk, x_msg.dtype), -1) shp = tf.shape(x_msg) - n_batches = shp[0] n_points = shp[1] - n_message_features = shp[2] - #compute the number of LSH bins to divide the input points into on the fly - #n_points must be divisible by bin_size exactly due to the use of reshape + # compute the number of LSH bins to divide the input points into on the fly + # n_points must be divisible by bin_size exactly due to the use of reshape n_bins = tf.math.floordiv(n_points, self.bin_size) - #put each input item into a bin defined by the argmax output across the LSH embedding - #FIXME: this needs n_bins to be at least 2 to work correctly! - mul = tf.linalg.matmul(x_msg, self.codebook_random_rotations[:, :n_bins//2]) + # put each input item into a bin defined by the argmax output across the LSH embedding + # FIXME: this needs n_bins to be at least 2 to work correctly! + mul = tf.linalg.matmul(x_msg, self.codebook_random_rotations[:, : n_bins // 2]) cmul = tf.concat([mul, -mul], axis=-1) bins_split = split_indices_to_bins_batch(cmul, n_bins, self.bin_size, msk) x_msg_binned = tf.gather(x_msg, bins_split, batch_dims=1) x_features_binned = tf.gather(x_node, bins_split, batch_dims=1) msk_f_binned = tf.gather(msk_f, bins_split, batch_dims=1) - #Run the node-to-node kernel (distance computation / graph building / attention) + # Run the node-to-node kernel (distance computation / graph building / attention) dm = self.kernel(x_msg_binned, msk_f_binned, training=training) - #remove the masked points row-wise and column-wise + # remove the masked points row-wise and column-wise msk_f_binned_squeeze = tf.squeeze(msk_f_binned, axis=-1) shp_dm = tf.shape(dm) rshp_row = [shp_dm[0], shp_dm[1], shp_dm[2], 1, 1] @@ -419,73 +472,64 @@ def call(self, x_msg, x_node, msk, training=False): return bins_split, x_features_binned, dm, msk_f_binned + class MessageBuildingLayerFull(tf.keras.layers.Layer): def __init__(self, distance_dim=128, kernel=NodePairGaussianKernel(), **kwargs): self.distance_dim = distance_dim self.kernel = kernel super(MessageBuildingLayerFull, self).__init__(**kwargs) - + """ x_msg: (n_batch, n_points, n_msg_features) """ + def call(self, x_msg, msk, training=False): msk_f = tf.expand_dims(tf.cast(msk, x_msg.dtype), -1) - shp = tf.shape(x_msg) - n_batches = shp[0] - n_points = shp[1] - n_message_features = shp[2] - - #Run the node-to-node kernel (distance computation / graph building / attention) + # Run the node-to-node kernel (distance computation / graph building / attention) dm = self.kernel(x_msg, training=training) - #remove the masked points row-wise and column-wise + # remove the masked points row-wise and column-wise dm = tf.einsum("bijk,bi->bijk", dm, tf.squeeze(msk_f, axis=-1)) dm = tf.einsum("bijk,bj->bijk", dm, tf.squeeze(msk_f, axis=-1)) return dm + class OutputDecoding(tf.keras.Model): - def __init__(self, + def __init__( + self, activation="elu", regression_use_classification=True, num_output_classes=8, schema="cms", dropout=0.0, - - pt_skip_gate=True, - eta_skip_gate=True, - phi_skip_gate=True, energy_skip_gate=True, - id_dim_decrease=True, charge_dim_decrease=True, pt_dim_decrease=False, eta_dim_decrease=False, phi_dim_decrease=False, energy_dim_decrease=False, - id_hidden_dim=128, charge_hidden_dim=128, pt_hidden_dim=128, eta_hidden_dim=128, phi_hidden_dim=128, energy_hidden_dim=128, - id_num_layers=4, charge_num_layers=2, pt_num_layers=3, eta_num_layers=3, phi_num_layers=3, energy_num_layers=3, - layernorm=False, mask_reg_cls0=True, energy_multimodal=True, - event_set_output=False, - **kwargs): + **kwargs + ): super(OutputDecoding, self).__init__(**kwargs) @@ -493,10 +537,6 @@ def __init__(self, self.schema = schema self.dropout = dropout - self.pt_skip_gate = pt_skip_gate - self.eta_skip_gate = eta_skip_gate - self.phi_skip_gate = phi_skip_gate - self.mask_reg_cls0 = mask_reg_cls0 self.energy_multimodal = energy_multimodal @@ -508,58 +548,71 @@ def __init__(self, self.event_set_output = event_set_output self.ffn_id = point_wise_feed_forward_network( - num_output_classes, id_hidden_dim, + num_output_classes, + id_hidden_dim, "ffn_cls", num_layers=id_num_layers, activation=activation, dim_decrease=id_dim_decrease, - dropout=dropout + dropout=dropout, ) self.ffn_charge = point_wise_feed_forward_network( - 1, charge_hidden_dim, + 1, + charge_hidden_dim, "ffn_charge", num_layers=charge_num_layers, activation=activation, dim_decrease=charge_dim_decrease, - dropout=dropout + dropout=dropout, ) - + self.ffn_pt = point_wise_feed_forward_network( - 2, pt_hidden_dim, "ffn_pt", + 2, + pt_hidden_dim, + "ffn_pt", num_layers=pt_num_layers, activation=activation, dim_decrease=pt_dim_decrease, - dropout=dropout + dropout=dropout, ) self.ffn_eta = point_wise_feed_forward_network( - 2, eta_hidden_dim, "ffn_eta", + 1, + eta_hidden_dim, + "ffn_eta", num_layers=eta_num_layers, activation=activation, dim_decrease=eta_dim_decrease, - dropout=dropout + dropout=dropout, ) + # sin_phi, cos_phi outputs self.ffn_phi = point_wise_feed_forward_network( - 4, phi_hidden_dim, "ffn_phi", + 2, + phi_hidden_dim, + "ffn_phi", num_layers=phi_num_layers, activation=activation, dim_decrease=phi_dim_decrease, - dropout=dropout + dropout=dropout, ) self.ffn_energy = point_wise_feed_forward_network( - num_output_classes if self.energy_multimodal else 1, energy_hidden_dim, "ffn_energy", + num_output_classes if self.energy_multimodal else 1, + energy_hidden_dim, + "ffn_energy", num_layers=energy_num_layers, activation=activation, dim_decrease=energy_dim_decrease, - dropout=dropout) + dropout=dropout, + ) """ X_input: (n_batch, n_elements, n_input_features) raw node input features X_encoded: (n_batch, n_elements, n_encoded_features) encoded/transformed node features msk_input: (n_batch, n_elements) boolean mask of active nodes """ + def call(self, args, training=False): X_input, X_encoded, X_encoded_energy, msk_input = args @@ -572,104 +625,94 @@ def call(self, args, training=False): msk_input_outtype = tf.cast(msk_input, out_id_logits.dtype) out_id_softmax = tf.nn.softmax(out_id_logits, axis=-1) - out_id_hard_softmax = tf.stop_gradient(tf.nn.softmax(100*out_id_logits, axis=-1)) + out_id_hard_softmax = tf.stop_gradient(tf.nn.softmax(100 * out_id_logits, axis=-1)) out_charge = self.ffn_charge(X_encoded, training=training) out_charge = out_charge * msk_input_outtype orig_eta = tf.cast(X_input[:, :, 2:3], out_id_logits.dtype) - #FIXME: better schema propagation between hep_tfds - #skip connection from raw input values + # FIXME: better schema propagation between hep_tfds + # skip connection from raw input values if self.schema == "cms": - orig_sin_phi = tf.cast(tf.math.sin(X_input[:, :, 3:4])*msk_input, out_id_logits.dtype) - orig_cos_phi = tf.cast(tf.math.cos(X_input[:, :, 3:4])*msk_input, out_id_logits.dtype) - orig_energy = tf.cast(X_input[:, :, 4:5]*msk_input, out_id_logits.dtype) + orig_sin_phi = tf.cast(tf.math.sin(X_input[:, :, 3:4]) * msk_input, out_id_logits.dtype) + orig_cos_phi = tf.cast(tf.math.cos(X_input[:, :, 3:4]) * msk_input, out_id_logits.dtype) + orig_energy = tf.cast(X_input[:, :, 4:5] * msk_input, out_id_logits.dtype) elif self.schema == "delphes": - orig_sin_phi = tf.cast(X_input[:, :, 3:4]*msk_input, out_id_logits.dtype) - orig_cos_phi = tf.cast(X_input[:, :, 4:5]*msk_input, out_id_logits.dtype) - orig_energy = tf.cast(X_input[:, :, 5:6]*msk_input, out_id_logits.dtype) + orig_sin_phi = tf.cast(X_input[:, :, 3:4] * msk_input, out_id_logits.dtype) + orig_cos_phi = tf.cast(X_input[:, :, 4:5] * msk_input, out_id_logits.dtype) + orig_energy = tf.cast(X_input[:, :, 5:6] * msk_input, out_id_logits.dtype) if self.regression_use_classification: X_encoded = tf.concat([X_encoded, tf.cast(tf.stop_gradient(out_id_logits), X_encoded.dtype)], axis=-1) pred_eta_corr = self.ffn_eta(X_encoded, training=training) - pred_eta_corr = pred_eta_corr*msk_input_outtype + pred_eta_corr = pred_eta_corr * msk_input_outtype pred_phi_corr = self.ffn_phi(X_encoded, training=training) - pred_phi_corr = pred_phi_corr*msk_input_outtype + pred_phi_corr = pred_phi_corr * msk_input_outtype - if self.eta_skip_gate: - eta_gate = tf.keras.activations.sigmoid(pred_eta_corr[:, :, 0:1]) - pred_eta = orig_eta + pred_eta_corr[:, :, 1:2] - else: - pred_eta = orig_eta*pred_eta_corr[:, :, 0:1] + pred_eta_corr[:, :, 1:2] - - if self.phi_skip_gate: - sin_phi_gate = tf.keras.activations.sigmoid(pred_phi_corr[:, :, 0:1]) - cos_phi_gate = tf.keras.activations.sigmoid(pred_phi_corr[:, :, 2:3]) - pred_sin_phi = orig_sin_phi + pred_phi_corr[:, :, 1:2] - pred_cos_phi = orig_cos_phi + pred_phi_corr[:, :, 3:4] - else: - pred_sin_phi = orig_sin_phi*pred_phi_corr[:, :, 0:1] + pred_phi_corr[:, :, 1:2] - pred_cos_phi = orig_cos_phi*pred_phi_corr[:, :, 2:3] + pred_phi_corr[:, :, 3:4] + pred_eta = orig_eta + pred_eta_corr[:, :, 0:1] + pred_sin_phi = orig_sin_phi + pred_phi_corr[:, :, 0:1] + pred_cos_phi = orig_cos_phi + pred_phi_corr[:, :, 1:2] X_encoded_energy = tf.concat([X_encoded, X_encoded_energy], axis=-1) if self.regression_use_classification: - X_encoded_energy = tf.concat([X_encoded_energy, tf.cast(tf.stop_gradient(out_id_logits), X_encoded.dtype)], axis=-1) + X_encoded_energy = tf.concat( + [X_encoded_energy, tf.cast(tf.stop_gradient(out_id_logits), X_encoded.dtype)], axis=-1 + ) pred_energy_corr = self.ffn_energy(X_encoded_energy, training=training) - pred_energy_corr = pred_energy_corr*msk_input_outtype + pred_energy_corr = pred_energy_corr * msk_input_outtype - #In case of a multimodal prediction, weight the per-class energy predictions by the approximately one-hot vector + # In case of a multimodal prediction, weight the per-class energy predictions by the approximately one-hot vector if self.energy_multimodal: - pred_energy = orig_energy+tf.reduce_sum(out_id_hard_softmax*pred_energy_corr, axis=-1, keepdims=True) + pred_energy = orig_energy + tf.reduce_sum(out_id_hard_softmax * pred_energy_corr, axis=-1, keepdims=True) else: - pred_energy = orig_energy+pred_energy_corr + pred_energy = orig_energy + pred_energy_corr pred_energy = tf.abs(pred_energy) - #compute pt=E/cosh(eta) + # compute pt=E/cosh(eta) + # FIXME: check if this is actually useful orig_pt = tf.stop_gradient(pred_energy / tf.math.cosh(tf.clip_by_value(pred_eta, -8, 8))) pred_pt_corr = self.ffn_pt(X_encoded_energy, training=training) - pred_pt_corr = pred_pt_corr*msk_input_outtype + pred_pt_corr = pred_pt_corr * msk_input_outtype + pred_pt = orig_pt * pred_pt_corr[:, :, 0:1] + pred_pt_corr[:, :, 1:2] - if self.pt_skip_gate: - pt_gate = tf.keras.activations.sigmoid(pred_pt_corr[:, :, 0:1]) - pred_pt = orig_pt + pt_gate*pred_pt_corr[:, :, 1:2] - else: - pred_pt = orig_pt*pred_pt_corr[:, :, 0:1] + pred_pt_corr[:, :, 1:2] - pred_pt = tf.abs(pred_pt) - #mask the regression outputs for the nodes with a class prediction 0 - msk_output = tf.expand_dims(tf.cast(tf.argmax(out_id_hard_softmax, axis=-1)!=0, tf.float32), axis=-1) + # mask the regression outputs for the nodes with a class prediction 0 + msk_output = tf.expand_dims(tf.cast(tf.argmax(out_id_hard_softmax, axis=-1) != 0, tf.float32), axis=-1) if self.mask_reg_cls0: - out_charge = out_charge*msk_output - pred_pt = pred_pt*msk_output - pred_eta = pred_eta*msk_output - pred_sin_phi = pred_sin_phi*msk_output - pred_cos_phi = pred_cos_phi*msk_output - pred_energy = pred_energy*msk_output + out_charge = out_charge * msk_output + pred_pt = pred_pt * msk_output + pred_eta = pred_eta * msk_output + pred_sin_phi = pred_sin_phi * msk_output + pred_cos_phi = pred_cos_phi * msk_output + pred_energy = pred_energy * msk_output ret = { "cls": out_id_softmax, - "charge": out_charge*msk_input_outtype, - "pt": pred_pt*msk_input_outtype, - "eta": pred_eta*msk_input_outtype, - "sin_phi": pred_sin_phi*msk_input_outtype, - "cos_phi": pred_cos_phi*msk_input_outtype, - "energy": pred_energy*msk_input_outtype, + "charge": out_charge * msk_input_outtype, + "pt": pred_pt * msk_input_outtype, + "eta": pred_eta * msk_input_outtype, + "sin_phi": pred_sin_phi * msk_input_outtype, + "cos_phi": pred_cos_phi * msk_input_outtype, + "energy": pred_energy * msk_input_outtype, } if self.event_set_output: - pt_e_eta_phi = tf.concat([ - pred_pt*msk_input_outtype, - pred_energy*msk_input_outtype, - pred_eta*msk_input_outtype, - pred_sin_phi*msk_input_outtype, - pred_cos_phi*msk_input_outtype - ], axis=-1) + pt_e_eta_phi = tf.concat( + [ + pred_pt * msk_input_outtype, + pred_energy * msk_input_outtype, + pred_eta * msk_input_outtype, + pred_sin_phi * msk_input_outtype, + pred_cos_phi * msk_input_outtype, + ], + axis=-1, + ) ret["pt_e_eta_phi"] = pt_e_eta_phi return ret @@ -690,9 +733,10 @@ def set_trainable_classification(self): self.ffn_pt.trainable = False self.ffn_energy.trainable = False + class CombinedGraphLayer(tf.keras.layers.Layer): def __init__(self, *args, **kwargs): - + self.max_num_bins = kwargs.pop("max_num_bins") self.bin_size = kwargs.pop("bin_size") self.distance_dim = kwargs.pop("distance_dim") @@ -708,31 +752,34 @@ def __init__(self, *args, **kwargs): self.dist_activation = getattr(tf.keras.activations, kwargs.pop("dist_activation", "linear")) if self.do_layernorm: - self.layernorm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-6, name=kwargs.get("name")+"_layernorm1") + self.layernorm1 = tf.keras.layers.LayerNormalization( + axis=-1, epsilon=1e-6, name=kwargs.get("name") + "_layernorm1" + ) - #self.gaussian_noise = tf.keras.layers.GaussianNoise(0.01) + # self.gaussian_noise = tf.keras.layers.GaussianNoise(0.01) self.ffn_dist = point_wise_feed_forward_network( self.distance_dim, self.ffn_dist_hidden_dim, kwargs.get("name") + "_ffn_dist", - num_layers=self.ffn_dist_num_layers, activation=self.activation, - dropout=self.dropout + num_layers=self.ffn_dist_num_layers, + activation=self.activation, + dropout=self.dropout, ) if self.do_lsh: self.message_building_layer = MessageBuildingLayerLSH( distance_dim=self.distance_dim, max_num_bins=self.max_num_bins, bin_size=self.bin_size, - kernel=build_kernel_from_conf(self.kernel, kwargs.get("name")+"_kernel") + kernel=build_kernel_from_conf(self.kernel, kwargs.get("name") + "_kernel"), ) else: self.message_building_layer = MessageBuildingLayerFull( - distance_dim=self.distance_dim, - kernel=build_kernel_from_conf(self.kernel, kwargs.get("name")+"_kernel") + distance_dim=self.distance_dim, kernel=build_kernel_from_conf(self.kernel, kwargs.get("name") + "_kernel") ) self.message_passing_layers = [ - get_message_layer(self.node_message, "{}_msg_{}".format(kwargs.get("name"), iconv)) for iconv in range(self.num_node_messages) + get_message_layer(self.node_message, "{}_msg_{}".format(kwargs.get("name"), iconv)) + for iconv in range(self.num_node_messages) ] self.dropout_layer = None if self.dropout: @@ -745,64 +792,66 @@ def call(self, x, msk, training=False): if self.do_layernorm: x = self.layernorm1(x, training=training) - #compute node features for graph building + # compute node features for graph building x_dist = self.dist_activation(self.ffn_dist(x, training=training)) - #compute the element-to-element messages / distance matrix / graph structure + # compute the element-to-element messages / distance matrix / graph structure if self.do_lsh: bins_split, x, dm, msk_f = self.message_building_layer(x_dist, x, msk) - #bins_split: (FIXME) - #x: (batch, bin, elem, node_feature) - #dm: (batch, bin, elem, elem, pair_feature) - #msk_f: (batch, bin, elem, elem, 1) + # bins_split: (FIXME) + # x: (batch, bin, elem, node_feature) + # dm: (batch, bin, elem, elem, pair_feature) + # msk_f: (batch, bin, elem, elem, 1) else: dm = self.message_building_layer(x_dist, msk) msk_f = tf.expand_dims(tf.cast(msk, x.dtype), axis=-1) bins_split = None - #dm: (batch, elem, elem, pair_feature) + # dm: (batch, elem, elem, pair_feature) - #run the node update with message passing + # run the node update with message passing for msg in self.message_passing_layers: x = msg((x, dm, msk_f)) if self.dropout_layer: x = self.dropout_layer(x, training=training) - #undo the binning according to the element-to-bin indices + # undo the binning according to the element-to-bin indices if self.do_lsh: x = reverse_lsh(bins_split, x) return {"enc": x, "dist": x_dist, "bins": bins_split, "dm": dm} + class PFNetDense(tf.keras.Model): - def __init__(self, - do_node_encoding=False, - node_encoding_hidden_dim=128, - dropout=0.0, - activation="gelu", - multi_output=False, - num_input_classes=8, - num_output_classes=3, - num_graph_layers_id=1, - num_graph_layers_reg=1, - input_encoding="cms", - skip_connection=True, - graph_kernel={}, - combined_graph_layer={}, - node_message={}, - output_decoding={}, - debug=False, - schema="cms", - node_update_mode="concat", - event_set_output=False, - **kwargs - ): + def __init__( + self, + do_node_encoding=False, + node_encoding_hidden_dim=128, + dropout=0.0, + activation="gelu", + multi_output=False, + num_input_classes=8, + num_output_classes=3, + num_graph_layers_id=1, + num_graph_layers_reg=1, + input_encoding="cms", + skip_connection=True, + graph_kernel={}, + combined_graph_layer={}, + node_message={}, + output_decoding={}, + debug=False, + schema="cms", + node_update_mode="concat", + event_set_output=False, + **kwargs + ): super(PFNetDense, self).__init__() self.multi_output = multi_output self.debug = debug self.skip_connection = skip_connection - + self.do_node_encoding = do_node_encoding self.node_encoding_hidden_dim = node_encoding_hidden_dim self.dropout = dropout @@ -816,7 +865,7 @@ def __init__(self, "node_encoding", num_layers=1, activation=self.activation, - dropout=self.dropout + dropout=self.dropout, ) if input_encoding == "cms": @@ -824,8 +873,12 @@ def __init__(self, elif input_encoding == "default": self.enc = InputEncoding(num_input_classes) - self.cg_id = [CombinedGraphLayer(name="cg_id_{}".format(i), **combined_graph_layer) for i in range(num_graph_layers_id)] - self.cg_reg = [CombinedGraphLayer(name="cg_reg_{}".format(i), **combined_graph_layer) for i in range(num_graph_layers_reg)] + self.cg_id = [ + CombinedGraphLayer(name="cg_id_{}".format(i), **combined_graph_layer) for i in range(num_graph_layers_id) + ] + self.cg_reg = [ + CombinedGraphLayer(name="cg_reg_{}".format(i), **combined_graph_layer) for i in range(num_graph_layers_reg) + ] output_decoding["schema"] = schema output_decoding["num_output_classes"] = num_output_classes @@ -836,10 +889,10 @@ def call(self, inputs, training=False): X = inputs debugging_data = {} - #encode the elements for classification (id) + # encode the elements for classification (id) X_enc = self.enc(X) - #mask padded elements + # mask padded elements msk = X[:, :, 0] != 0 msk_input = tf.expand_dims(tf.cast(msk, X_enc.dtype), -1) @@ -863,9 +916,9 @@ def call(self, inputs, training=False): if self.debug: debugging_data[cg.name] = enc_all - + if self.node_update_mode == "concat": - dec_output_id = tf.concat(encs_id, axis=-1)*msk_input + dec_output_id = tf.concat(encs_id, axis=-1) * msk_input elif self.node_update_mode == "additive": dec_output_id = X_enc_cg @@ -890,7 +943,7 @@ def call(self, inputs, training=False): encs_reg.append(X_enc_cg) if self.node_update_mode == "concat": - dec_output_reg = tf.concat(encs_reg, axis=-1)*msk_input + dec_output_reg = tf.concat(encs_reg, axis=-1) * msk_input elif self.node_update_mode == "additive": dec_output_reg = X_enc_cg @@ -907,7 +960,9 @@ def call(self, inputs, training=False): if self.multi_output: return ret else: - return tf.concat([ret["cls"], ret["charge"], ret["pt"], ret["eta"], ret["sin_phi"], ret["cos_phi"], ret["energy"]], axis=-1) + return tf.concat( + [ret["cls"], ret["charge"], ret["pt"], ret["eta"], ret["sin_phi"], ret["cos_phi"], ret["energy"]], axis=-1 + ) def set_trainable_named(self, layer_names): self.trainable = True @@ -961,14 +1016,20 @@ def set_trainable_named(self, layer_names): # self.step += 1 # return {m.name: m.result() for m in self.metrics} + class KernelEncoder(tf.keras.layers.Layer): def __init__(self, *args, **kwargs): from official.nlp.modeling.layers.kernel_attention import KernelAttention + self.key_dim = kwargs.pop("key_dim") num_heads = 2 - self.attn = KernelAttention(feature_transform="elu", num_heads=num_heads, key_dim=self.key_dim, name=kwargs.get("name") + "_attention") - self.ffn = point_wise_feed_forward_network(self.key_dim, self.key_dim, kwargs.get("name") + "_ffn", num_layers=1, activation="elu") + self.attn = KernelAttention( + feature_transform="elu", num_heads=num_heads, key_dim=self.key_dim, name=kwargs.get("name") + "_attention" + ) + self.ffn = point_wise_feed_forward_network( + self.key_dim, self.key_dim, kwargs.get("name") + "_ffn", num_layers=1, activation="elu" + ) self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, name=kwargs.get("name") + "_ln0") self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, name=kwargs.get("name") + "_ln1") super(KernelEncoder, self).__init__(*args, **kwargs) @@ -977,7 +1038,7 @@ def call(self, args, training=False): X, mask = args msk_input = tf.expand_dims(tf.cast(mask, tf.float32), -1) - attn_output = self.attn(query=X, value=X, key=X, training=training, attention_mask=mask)*msk_input + attn_output = self.attn(query=X, value=X, key=X, training=training, attention_mask=mask) * msk_input out1 = self.norm1(X + attn_output) ffn_output = self.ffn(out1) @@ -985,16 +1046,24 @@ def call(self, args, training=False): return out2 + class KernelDecoder(tf.keras.layers.Layer): def __init__(self, *args, **kwargs): from official.nlp.modeling.layers.kernel_attention import KernelAttention + self.key_dim = kwargs.pop("key_dim") num_heads = 2 - self.attn1 = KernelAttention(feature_transform="elu", num_heads=num_heads, key_dim=self.key_dim, name=kwargs.get("name") + "_attention1") - self.attn2 = KernelAttention(feature_transform="elu", num_heads=num_heads, key_dim=self.key_dim, name=kwargs.get("name") + "_attention2") + self.attn1 = KernelAttention( + feature_transform="elu", num_heads=num_heads, key_dim=self.key_dim, name=kwargs.get("name") + "_attention1" + ) + self.attn2 = KernelAttention( + feature_transform="elu", num_heads=num_heads, key_dim=self.key_dim, name=kwargs.get("name") + "_attention2" + ) - self.ffn = point_wise_feed_forward_network(self.key_dim, self.key_dim, kwargs.get("name") + "_ffn", num_layers=1, activation="elu") + self.ffn = point_wise_feed_forward_network( + self.key_dim, self.key_dim, kwargs.get("name") + "_ffn", num_layers=1, activation="elu" + ) self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, name=kwargs.get("name") + "_ln0") self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, name=kwargs.get("name") + "_ln1") @@ -1005,10 +1074,10 @@ def call(self, args, training=False): X, enc_output, mask = args msk_input = tf.expand_dims(tf.cast(mask, tf.float32), -1) - attn1 = self.attn1(query=X, value=X, key=X, training=training, attention_mask=mask)*msk_input + attn1 = self.attn1(query=X, value=X, key=X, training=training, attention_mask=mask) * msk_input out1 = self.norm1(attn1 + X, training=training) - attn2 = self.attn2(query=enc_output, value=enc_output, key=out1, training=training, attention_mask=mask)*msk_input + attn2 = self.attn2(query=enc_output, value=enc_output, key=out1, training=training, attention_mask=mask) * msk_input out2 = self.norm2(attn2 + out1) ffn_output = self.ffn(out2) # (batch_size, target_seq_len, d_model) @@ -1016,6 +1085,7 @@ def call(self, args, training=False): return out3 + class Transformer(tf.keras.layers.Layer): def __init__(self, *args, **kwargs): self.encoders = [] @@ -1036,17 +1106,18 @@ def call(self, inputs, training=False): msk_input = tf.expand_dims(tf.cast(mask, tf.float32), -1) for enc in self.encoders: - X = enc([X, mask], training=training)*msk_input + X = enc([X, mask], training=training) * msk_input X_dec = X for dec in self.decoders: - X_dec = dec([X_dec, X, mask], training=training)*msk_input + X_dec = dec([X_dec, X, mask], training=training) * msk_input return X_dec class PFNetTransformer(tf.keras.Model): - def __init__(self, + def __init__( + self, num_input_classes=8, num_output_classes=3, input_encoding="cms", @@ -1054,7 +1125,7 @@ def __init__(self, output_decoding={}, multi_output=True, event_set_output=False, - ): + ): super(PFNetTransformer, self).__init__() self.multi_output = multi_output @@ -1077,9 +1148,8 @@ def __init__(self, def call(self, inputs, training=False): X = inputs - debugging_data = {} - #mask padded elements + # mask padded elements msk = tf.cast(X[:, :, 0] != 0, tf.float32) msk_input = tf.expand_dims(tf.cast(msk, tf.float32), -1) @@ -1094,4 +1164,6 @@ def call(self, inputs, training=False): if self.multi_output: return ret else: - return tf.concat([ret["cls"], ret["charge"], ret["pt"], ret["eta"], ret["sin_phi"], ret["cos_phi"], ret["energy"]], axis=-1) + return tf.concat( + [ret["cls"], ret["charge"], ret["pt"], ret["eta"], ret["sin_phi"], ret["cos_phi"], ret["energy"]], axis=-1 + ) diff --git a/mlpf/tfmodel/model_setup.py b/mlpf/tfmodel/model_setup.py index 75b542a3c..1e5861e30 100644 --- a/mlpf/tfmodel/model_setup.py +++ b/mlpf/tfmodel/model_setup.py @@ -3,99 +3,55 @@ except ModuleNotFoundError: print("hvd not enabled, ignoring") -from .model import PFNetTransformer, PFNetDense - -import tensorflow as tf -import tensorflow_addons as tfa -import pickle -import numpy as np -import os -import io -import os -import yaml -import uuid -import matplotlib -import matplotlib.pyplot as plt -from argparse import Namespace -import time +import glob import json -import random -import math -import platform -import mplhep -from tqdm import tqdm +import os +import pickle from pathlib import Path -import glob - -import tf2onnx -import sklearn -import sklearn.metrics +import awkward import fastjet +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf +import tensorflow_addons as tfa +import tf2onnx import vector -import awkward - -from tfmodel.onecycle_scheduler import OneCycleScheduler, MomentumOneCycleScheduler +from tensorflow.keras.metrics import Recall from tfmodel.callbacks import CustomTensorBoard -from tfmodel.utils import get_lr_schedule, get_optimizer, make_weight_function, targets_multi_output from tfmodel.datasets.BaseDatasetFactory import unpack_target -import tensorflow_datasets as tfds - - -from tensorflow.keras.metrics import Recall, CategoricalAccuracy -import keras - -def plot_confusion_matrix(cm): - fig = plt.figure(figsize=(5,5)) - plt.imshow(cm, cmap="Blues") - plt.xlabel("Predicted PID") - plt.ylabel("Target PID") - plt.colorbar() - plt.tight_layout() - return fig - -def plot_to_image(figure): - """ - Converts the matplotlib plot specified by 'figure' to a PNG image and - returns it. The supplied figure is closed and inaccessible after this call. - """ - - buf = io.BytesIO() - - # Use plt.savefig to save the plot to a PNG in memory. - plt.savefig(buf, format='png') - plt.close(figure) - buf.seek(0) - - image = tf.image.decode_png(buf.getvalue(), channels=4) - image = tf.expand_dims(image, 0) - - return image +from tqdm import tqdm + +from .model import PFNetDense, PFNetTransformer + class ModelOptimizerCheckpoint(tf.keras.callbacks.ModelCheckpoint): def on_epoch_end(self, epoch, logs=None): super(ModelOptimizerCheckpoint, self).on_epoch_end(epoch, logs=logs) - weightfile_path = self.opt_path.format(epoch=epoch+1, **logs) - try: - #PCGrad is derived from the legacy optimizer + weightfile_path = self.opt_path.format(epoch=epoch + 1, **logs) + try: + # PCGrad is derived from the legacy optimizer if self.model.optimizer.__class__.__module__ == "keras.optimizers.optimizer_v1": - #lr = self.model.optimizer.optimizer.optimizer.lr + # lr = self.model.optimizer.optimizer.optimizer.lr weights = self.model.optimizer.optimizer.optimizer.get_weights() else: - #lr = self.model.optimizer.lr + # lr = self.model.optimizer.lr weights = self.model.optimizer.get_weights() with open(weightfile_path, "wb") as fi: - pickle.dump({ - #"lr": lr, - "weights": weights - }, fi + pickle.dump( + { + # "lr": lr, + "weights": weights + }, + fi, ) except Exception as e: print("Could not save optimizer state: {}".format(e)) if os.path.isfile(weightfile_path): os.remove(weightfile_path) + class CustomCallback(tf.keras.callbacks.Callback): def __init__(self, outpath, dataset, config, plot_freq=1, horovod_enabled=False): super(CustomCallback, self).__init__() @@ -108,115 +64,98 @@ def __init__(self, outpath, dataset, config, plot_freq=1, horovod_enabled=False) self.writer = tf.summary.create_file_writer(outpath) def on_epoch_end(self, epoch, logs=None): - if not self.horovod_enabled or hvd.rank()==0: + if not self.horovod_enabled or hvd.rank() == 0: epoch_end(self, epoch, logs) + def epoch_end(self, epoch, logs): - #first epoch is 1, not 0 + # first epoch is 1, not 0 epoch = epoch + 1 - #save the training logs (losses) for this epoch + # save the training logs (losses) for this epoch with open("{}/history_{}.json".format(self.outpath, epoch), "w") as fi: json.dump(logs, fi) - if self.plot_freq<=0: + if self.plot_freq <= 0: return - - if self.plot_freq>=1: - if epoch%self.plot_freq!=0: + + if self.plot_freq >= 1: + if epoch % self.plot_freq != 0: return cp_dir = Path(self.outpath) / "epoch_{}".format(epoch) cp_dir.mkdir(parents=True, exist_ok=True) - #run the model inference on the validation dataset + # run the model inference on the validation dataset eval_model(self.model, self.dataset, self.config, cp_dir) - + yvals = {} - for fi in glob.glob(str(cp_dir/"*.npz")): + for fi in glob.glob(str(cp_dir / "*.npz")): dd = np.load(fi) keys_in_file = list(dd.keys()) for k in keys_in_file: - if k=="X": + if k == "X": continue if not (k in yvals): yvals[k] = [] yvals[k].append(dd[k]) yvals = {k: np.concatenate(v) for k, v in yvals.items()} - gen_px = yvals["gen_pt"]*yvals["gen_cos_phi"] - gen_py = yvals["gen_pt"]*yvals["gen_sin_phi"] - pred_px = yvals["pred_pt"]*yvals["pred_cos_phi"] - pred_py = yvals["pred_pt"]*yvals["pred_sin_phi"] - cand_px = yvals["cand_pt"]*yvals["cand_cos_phi"] - cand_py = yvals["cand_pt"]*yvals["cand_sin_phi"] + gen_px = yvals["gen_pt"] * yvals["gen_cos_phi"] + gen_py = yvals["gen_pt"] * yvals["gen_sin_phi"] + pred_px = yvals["pred_pt"] * yvals["pred_cos_phi"] + pred_py = yvals["pred_pt"] * yvals["pred_sin_phi"] + cand_px = yvals["cand_pt"] * yvals["cand_cos_phi"] + cand_py = yvals["cand_pt"] * yvals["cand_sin_phi"] - gen_met = np.sqrt(np.sum(gen_px**2+gen_py**2, axis=1)) - pred_met = np.sqrt(np.sum(pred_px**2+pred_py**2, axis=1)) - cand_met = np.sqrt(np.sum(cand_px**2+cand_py**2, axis=1)) + gen_met = np.sqrt(np.sum(gen_px**2 + gen_py**2, axis=1)) + pred_met = np.sqrt(np.sum(pred_px**2 + pred_py**2, axis=1)) + cand_met = np.sqrt(np.sum(cand_px**2 + cand_py**2, axis=1)) with self.writer.as_default(): - jet_ratio = yvals["jets_pt_gen_to_pred"][:, 1]/yvals["jets_pt_gen_to_pred"][:, 0] + jet_ratio = yvals["jets_pt_gen_to_pred"][:, 1] / yvals["jets_pt_gen_to_pred"][:, 0] plt.figure() - b = np.linspace(0,5,100) - plt.hist(yvals["jets_pt_gen_to_cand"][:, 1]/yvals["jets_pt_gen_to_cand"][:, 0], bins=b, histtype="step", lw=2) - plt.hist(yvals["jets_pt_gen_to_pred"][:, 1]/yvals["jets_pt_gen_to_pred"][:, 0], bins=b, histtype="step", lw=2) - plt.savefig(str(cp_dir/"jet_res.png"), bbox_inches="tight", dpi=100) + b = np.linspace(0, 5, 100) + plt.hist(yvals["jets_pt_gen_to_cand"][:, 1] / yvals["jets_pt_gen_to_cand"][:, 0], bins=b, histtype="step", lw=2) + plt.hist(yvals["jets_pt_gen_to_pred"][:, 1] / yvals["jets_pt_gen_to_pred"][:, 0], bins=b, histtype="step", lw=2) + plt.savefig(str(cp_dir / "jet_res.png"), bbox_inches="tight", dpi=100) plt.clf() plt.figure() - b = np.linspace(0,5,100) - plt.hist(cand_met/gen_met, bins=b, histtype="step", lw=2) - plt.hist(pred_met/gen_met, bins=b, histtype="step", lw=2) - plt.savefig(str(cp_dir/"met_res.png"), bbox_inches="tight", dpi=100) + b = np.linspace(0, 5, 100) + plt.hist(cand_met / gen_met, bins=b, histtype="step", lw=2) + plt.hist(pred_met / gen_met, bins=b, histtype="step", lw=2) + plt.savefig(str(cp_dir / "met_res.png"), bbox_inches="tight", dpi=100) plt.clf() - tf.summary.histogram( - "jet_pt_pred_over_gen", jet_ratio, - step=epoch-1, - buckets=None, - description=None - ) - tf.summary.scalar( - "jet_pt_pred_over_gen_mean", np.mean(jet_ratio), step=epoch-1, description=None - ) - tf.summary.scalar( - "jet_pt_pred_over_gen_std", np.std(jet_ratio), step=epoch-1, description=None - ) - - - tf.summary.histogram( - "met_pred_over_gen", pred_met/gen_met, - step=epoch-1, - buckets=None, - description=None - ) - tf.summary.scalar( - "met_pred_over_gen_mean", np.mean(pred_met/gen_met), step=epoch-1, description=None - ) - tf.summary.scalar( - "met_pred_over_gen_std", np.std(pred_met/gen_met), step=epoch-1, description=None - ) + tf.summary.histogram("jet_pt_pred_over_gen", jet_ratio, step=epoch - 1, buckets=None, description=None) + tf.summary.scalar("jet_pt_pred_over_gen_mean", np.mean(jet_ratio), step=epoch - 1, description=None) + tf.summary.scalar("jet_pt_pred_over_gen_std", np.std(jet_ratio), step=epoch - 1, description=None) + + tf.summary.histogram("met_pred_over_gen", pred_met / gen_met, step=epoch - 1, buckets=None, description=None) + tf.summary.scalar("met_pred_over_gen_mean", np.mean(pred_met / gen_met), step=epoch - 1, description=None) + tf.summary.scalar("met_pred_over_gen_std", np.std(pred_met / gen_met), step=epoch - 1, description=None) def prepare_callbacks( - config, - outdir, - dataset, - comet_experiment=None, - horovod_enabled=False, - ): + config, + outdir, + dataset, + comet_experiment=None, + horovod_enabled=False, +): callbacks = [] terminate_cb = tf.keras.callbacks.TerminateOnNaN() callbacks += [terminate_cb] - if not horovod_enabled or hvd.rank()==0: + if not horovod_enabled or hvd.rank() == 0: callbacks += get_checkpoint_history_callback(outdir, config, dataset, comet_experiment, horovod_enabled) return callbacks + def get_checkpoint_history_callback(outdir, config, dataset, comet_experiment, horovod_enabled): callbacks = [] cp_dir = Path(outdir) / "weights" @@ -233,22 +172,23 @@ def get_checkpoint_history_callback(outdir, config, dataset, comet_experiment, h history_path = Path(outdir) / "history" history_path.mkdir(parents=True, exist_ok=True) - history_path = str(history_path) + history_path = str(history_path) cb = CustomCallback( history_path, dataset.take(config["setup"]["num_events_validation"]), config, plot_freq=config["callbacks"]["plot_freq"], - horovod_enabled=horovod_enabled + horovod_enabled=horovod_enabled, ) callbacks += [cb] tb = CustomTensorBoard( log_dir=outdir + "/logs", histogram_freq=config["callbacks"]["tensorboard"]["hist_freq"], - write_graph=False, write_images=False, + write_graph=False, + write_images=False, update_freq="epoch", - #profile_batch=(10,90), + # profile_batch=(10,90), profile_batch=0, dump_history=config["callbacks"]["tensorboard"]["dump_history"], ) @@ -258,7 +198,8 @@ def get_checkpoint_history_callback(outdir, config, dataset, comet_experiment, h return callbacks -def get_rundir(base='experiments'): + +def get_rundir(base="experiments"): if not os.path.exists(base): os.makedirs(base) @@ -266,28 +207,23 @@ def get_rundir(base='experiments'): if len(previous_runs) == 0: run_number = 1 else: - run_number = max([int(s.split('run_')[1]) for s in previous_runs]) + 1 - - logdir = 'run_%02d' % run_number - return '{}/{}'.format(base, logdir) + run_number = max([int(s.split("run_")[1]) for s in previous_runs]) + 1 - -def scale_outputs(X,y,w): - ynew = y-out_m - ynew = ynew/out_s - return X, ynew, w + logdir = "run_%02d" % run_number + return "{}/{}".format(base, logdir) def make_model(config, dtype): - model = config['parameters']['model'] + model = config["parameters"]["model"] - if model == 'transformer': + if model == "transformer": return make_transformer(config, dtype) - elif model == 'gnn_dense': + elif model == "gnn_dense": return make_gnn_dense(config, dtype) raise KeyError("Unknown model type {}".format(model)) + def make_gnn_dense(config, dtype): parameters = [ @@ -302,50 +238,50 @@ def make_gnn_dense(config, dtype): "skip_connection", "output_decoding", "combined_graph_layer", - "debug" + "debug", ] kwargs = {} for par in parameters: - if par in config['parameters'].keys(): - kwargs[par] = config['parameters'][par] + if par in config["parameters"].keys(): + kwargs[par] = config["parameters"][par] model = PFNetDense( multi_output=config["setup"]["multi_output"], num_input_classes=config["dataset"]["num_input_classes"], num_output_classes=config["dataset"]["num_output_classes"], schema=config["dataset"]["schema"], - event_set_output=config["loss"]["event_loss"]!="none", + event_set_output=config["loss"]["event_loss"] != "none", **kwargs ) return model + def make_transformer(config, dtype): - parameters = [ - "input_encoding", - "output_decoding" - ] + parameters = ["input_encoding", "output_decoding"] kwargs = {} for par in parameters: - if par in config['parameters'].keys(): - kwargs[par] = config['parameters'][par] + if par in config["parameters"].keys(): + kwargs[par] = config["parameters"][par] model = PFNetTransformer( multi_output=config["setup"]["multi_output"], num_input_classes=config["dataset"]["num_input_classes"], num_output_classes=config["dataset"]["num_output_classes"], schema=config["dataset"]["schema"], - event_set_output=config["loss"]["event_loss"]!="none", + event_set_output=config["loss"]["event_loss"] != "none", **kwargs ) return model -def deltar(a,b): + +def deltar(a, b): return a.deltaR(b) -#Given a model, evaluates it on each batch of the validation dataset -#For each batch, save the inputs, the generator-level target, the candidate-level target, and the prediction + +# Given a model, evaluates it on each batch of the validation dataset +# For each batch, save the inputs, the generator-level target, the candidate-level target, and the prediction def eval_model(model, dataset, config, outdir): ibatch = 0 @@ -371,14 +307,13 @@ def eval_model(model, dataset, config, outdir): jets_const = {} for typ in ["gen", "cand", "pred"]: cls_id = np.argmax(outs["{}_cls".format(typ)], axis=-1) - valid = cls_id!=0 + valid = cls_id != 0 pt = awkward.from_iter([y[m][:, 0] for y, m in zip(outs["{}_pt".format(typ)], valid)]) eta = awkward.from_iter([y[m][:, 0] for y, m in zip(outs["{}_eta".format(typ)], valid)]) phi = np.arctan2(outs["{}_sin_phi".format(typ)], outs["{}_cos_phi".format(typ)]) phi = awkward.from_iter([y[m][:, 0] for y, m in zip(phi, valid)]) e = awkward.from_iter([y[m][:, 0] for y, m in zip(outs["{}_energy".format(typ)], valid)]) - idx_to_elem = awkward.from_iter([np.arange(len(m))[m] for m in valid]) vec = vector.arr({"pt": pt, "eta": eta, "phi": phi, "e": e}) @@ -394,108 +329,113 @@ def eval_model(model, dataset, config, outdir): outs["jets_cand_{}".format(key)] = awkward.to_numpy(awkward.flatten(getattr(jets_coll["cand"], key))) outs["jets_pred_{}".format(key)] = awkward.to_numpy(awkward.flatten(getattr(jets_coll["pred"], key))) - #DeltaR match between genjets and PF/MLPF jets + # DeltaR match between genjets and PF/MLPF jets cart = awkward.cartesian([jets_coll["gen"], jets_coll["pred"]], nested=True) jets_a, jets_b = awkward.unzip(cart) drs = deltar(jets_a, jets_b) - match_gen_to_pred = [awkward.where(d<0.1) for d in drs] + match_gen_to_pred = [awkward.where(d < 0.1) for d in drs] m0 = awkward.from_iter([m[0] for m in match_gen_to_pred]) m1 = awkward.from_iter([m[1] for m in match_gen_to_pred]) j1s = jets_coll["gen"][m0] j2s = jets_coll["pred"][m1] - outs["jets_pt_gen_to_pred"] = np.stack([awkward.to_numpy(awkward.flatten(j1s.pt)), awkward.to_numpy(awkward.flatten(j2s.pt))], axis=-1) + outs["jets_pt_gen_to_pred"] = np.stack( + [awkward.to_numpy(awkward.flatten(j1s.pt)), awkward.to_numpy(awkward.flatten(j2s.pt))], axis=-1 + ) cart = awkward.cartesian([jets_coll["gen"], jets_coll["cand"]], nested=True) jets_a, jets_b = awkward.unzip(cart) drs = deltar(jets_a, jets_b) - match_gen_to_pred = [awkward.where(d<0.1) for d in drs] + match_gen_to_pred = [awkward.where(d < 0.1) for d in drs] m0 = awkward.from_iter([m[0] for m in match_gen_to_pred]) m1 = awkward.from_iter([m[1] for m in match_gen_to_pred]) j1s = jets_coll["gen"][m0] j2s = jets_coll["cand"][m1] - outs["jets_pt_gen_to_cand"] = np.stack([awkward.to_numpy(awkward.flatten(j1s.pt)), awkward.to_numpy(awkward.flatten(j2s.pt))], axis=-1) - - np.savez( - np_outfile, - X=elem["X"], - **outs + outs["jets_pt_gen_to_cand"] = np.stack( + [awkward.to_numpy(awkward.flatten(j1s.pt)), awkward.to_numpy(awkward.flatten(j2s.pt))], axis=-1 ) - + + np.savez(np_outfile, X=elem["X"], **outs) + ibatch += 1 + def freeze_model(model, config, outdir): - bin_size = config["parameters"]["combined_graph_layer"]["bin_size"] num_features = config["dataset"]["num_input_features"] - num_out_classes = config["dataset"]["num_output_classes"] def model_output(ret): - return tf.concat([ret["cls"], ret["charge"], ret["pt"], ret["eta"], ret["sin_phi"], ret["cos_phi"], ret["energy"]], axis=-1) + return tf.concat( + [ret["cls"], ret["charge"], ret["pt"], ret["eta"], ret["sin_phi"], ret["cos_phi"], ret["energy"]], axis=-1 + ) + full_model = tf.function(lambda x: model_output(model(x, training=False))) - #we need to use opset 12 for the version of ONNXRuntime in CMSSW - #the warnings "RuntimeError: Opset (12) must be >= 13 for operator 'batch_dot'." do not seem to be critical + # we need to use opset 12 for the version of ONNXRuntime in CMSSW + # the warnings "RuntimeError: Opset (12) must be >= 13 for operator 'batch_dot'." do not seem to be critical model_proto, _ = tf2onnx.convert.from_function( full_model, opset=12, - input_signature=(tf.TensorSpec((None, None, num_features), tf.float32, name="x:0"), ), - output_path=str(Path(outdir) / "model.onnx") + input_signature=(tf.TensorSpec((None, None, num_features), tf.float32, name="x:0"),), + output_path=str(Path(outdir) / "model.onnx"), ) + class FlattenedCategoricalAccuracy(tf.keras.metrics.CategoricalAccuracy): def __init__(self, use_weights=False, **kwargs): super(FlattenedCategoricalAccuracy, self).__init__(**kwargs) self.use_weights = use_weights def update_state(self, y_true, y_pred, sample_weight=None): - #flatten the batch dimension - _y_true = tf.reshape(y_true, (tf.shape(y_true)[0]*tf.shape(y_true)[1], tf.shape(y_true)[2])) - _y_pred = tf.reshape(y_pred, (tf.shape(y_pred)[0]*tf.shape(y_pred)[1], tf.shape(y_pred)[2])) + # flatten the batch dimension + _y_true = tf.reshape(y_true, (tf.shape(y_true)[0] * tf.shape(y_true)[1], tf.shape(y_true)[2])) + _y_pred = tf.reshape(y_pred, (tf.shape(y_pred)[0] * tf.shape(y_pred)[1], tf.shape(y_pred)[2])) sample_weights = None if self.use_weights: - sample_weights = _y_true*tf.reduce_sum(_y_true, axis=0) - sample_weights = 1.0/sample_weights[sample_weights!=0] + sample_weights = _y_true * tf.reduce_sum(_y_true, axis=0) + sample_weights = 1.0 / sample_weights[sample_weights != 0] super(FlattenedCategoricalAccuracy, self).update_state(_y_true, _y_pred, sample_weights) + class SingleClassRecall(Recall): def __init__(self, icls, **kwargs): super(SingleClassRecall, self).__init__(**kwargs) self.icls = icls def update_state(self, y_true, y_pred, sample_weight=None): - #flatten the batch dimension - _y_true = tf.reshape(y_true, (tf.shape(y_true)[0]*tf.shape(y_true)[1], tf.shape(y_true)[2])) - _y_pred = tf.argmax(tf.reshape(y_pred, (tf.shape(y_pred)[0]*tf.shape(y_pred)[1], tf.shape(y_pred)[2])), axis=-1) - super(SingleClassRecall, self).update_state( - _y_true[:, self.icls], - tf.cast(_y_pred==self.icls, tf.float32) - ) + # flatten the batch dimension + _y_true = tf.reshape(y_true, (tf.shape(y_true)[0] * tf.shape(y_true)[1], tf.shape(y_true)[2])) + _y_pred = tf.argmax(tf.reshape(y_pred, (tf.shape(y_pred)[0] * tf.shape(y_pred)[1], tf.shape(y_pred)[2])), axis=-1) + super(SingleClassRecall, self).update_state(_y_true[:, self.icls], tf.cast(_y_pred == self.icls, tf.float32)) + class FlattenedMeanIoU(tf.keras.metrics.MeanIoU): def __init__(self, use_weights=False, **kwargs): super(FlattenedMeanIoU, self).__init__(**kwargs) def update_state(self, y_true, y_pred, sample_weight=None): - #flatten the batch dimension - _y_true = tf.reshape(y_true, (tf.shape(y_true)[0]*tf.shape(y_true)[1], tf.shape(y_true)[2])) - _y_pred = tf.reshape(y_pred, (tf.shape(y_pred)[0]*tf.shape(y_pred)[1], tf.shape(y_pred)[2])) + # flatten the batch dimension + _y_true = tf.reshape(y_true, (tf.shape(y_true)[0] * tf.shape(y_true)[1], tf.shape(y_true)[2])) + _y_pred = tf.reshape(y_pred, (tf.shape(y_pred)[0] * tf.shape(y_pred)[1], tf.shape(y_pred)[2])) super(FlattenedMeanIoU, self).update_state(_y_true, _y_pred, None) + class LearningRateLoggingCallback(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, numpy_logs): try: lr = self.model.optimizer._decayed_lr(tf.float32).numpy() - tf.summary.scalar('learning rate', data=lr, step=epoch) + tf.summary.scalar("learning rate", data=lr, step=epoch) except AttributeError as e: + print(e) pass + def configure_model_weights(model, trainable_layers): print("setting trainable layers: {}".format(trainable_layers)) - if (trainable_layers is None): + if trainable_layers is None: trainable_layers = "all" if trainable_layers == "all": @@ -522,11 +462,15 @@ def configure_model_weights(model, trainable_layers): non_trainable_count = sum([np.prod(tf.keras.backend.get_value(w).shape) for w in model.non_trainable_weights]) print("trainable={} non_trainable={}".format(trainable_count, non_trainable_count)) + def make_focal_loss(config): - def loss(x,y): - return tfa.losses.sigmoid_focal_crossentropy(x,y, + def loss(x, y): + return tfa.losses.sigmoid_focal_crossentropy( + x, + y, alpha=float(config["setup"].get("focal_loss_alpha", 0.25)), gamma=float(config["setup"].get("focal_loss_gamma", 2.0)), - from_logits=bool(config["setup"].get("focal_loss_from_logits", False)) + from_logits=bool(config["setup"].get("focal_loss_from_logits", False)), ) + return loss diff --git a/mlpf/tfmodel/mpnn.py b/mlpf/tfmodel/mpnn.py deleted file mode 100644 index 9d1477bb5..000000000 --- a/mlpf/tfmodel/mpnn.py +++ /dev/null @@ -1,291 +0,0 @@ -import tensorflow as tf - -class EdgeNetwork(tf.keras.Model): - """EdgeNetwork is a choice for message function that allow vector valued edge features. - - M(h_v, h_w, e_{vw}) = A(e_{vw})h_w. where A is a neural network which maps the edge vector e_{vw} to - a d x d matrix. where d is the dimension of node state vector. - - Here we have the simplest nn - relu(linear) - """ - def __init__(self, state_dim, name='edgenetwork'): - super(EdgeNetwork, self).__init__(name=name) - self.state_dim = state_dim - self.nn = tf.keras.layers.Dense(units=state_dim ** 2, activation=tf.nn.relu) - - def call(self, states, edges): - """ - Input - ----- - states: bs x #nodes^2 x state_dim - edges: bs x #nodes^2 x #edge_features - - Output - ------ - messages: bs x #nodes^2 x state_dim - - Map edge vectors to d x d matrices. Reshape both states and edges to do matrix mulltiplication. - the matrix mutltiplication is doing dot products between: - and - The output message tensor represents: - [ - [ - - for i in range(n_nodes) - for j in range(n_nodes) - ] for k in range(n_graph) - ] - """ - total_edges = tf.shape(edges)[1] - state_dim = self.state_dim - - Ae_vw = self.nn(edges) # bs x #nodes^2 x state_dim^2 - Ae_vw = tf.reshape(Ae_vw, [-1, state_dim, state_dim]) # bs * #nodes^2 x state_dim x state_dim - states = tf.reshape(states, [-1, state_dim, 1]) # bs * #nodes^2 x state_dim x 1 - messages = tf.matmul(Ae_vw, states) # bs * #nodes^2 x state_dim x 1 - messages = tf.reshape(messages, [-1, total_edges, state_dim]) # bs x #nodes^2 x state_dim - return messages - - -class Aggregation(tf.keras.Model): - def __init__(self, method='sum', axis=2, name='aggregation'): - assert method in ['sum', 'mean'], 'Unsupported aggregation method' - super(Aggregation, self).__init__(name=name) - self.method = method - self.axis = axis - - def call(self, x, keepdims= False, *args , **kwargs): #KYR: added keepdims - if self.method == 'sum': - return tf.reduce_sum(x, self.axis, keepdims) - else: - return tf.reduce_mean(messages, self.axis) - - -class UpdateFunction(tf.keras.Model): - """Node states update function via GRU. - - U_t = GRU(h_v^t, m_v^{t+1}) - - The same update function is used at each time step t. - """ - def __init__(self, state_dim, name='message_update_function'): - super(UpdateFunction, self).__init__(name=name) - self.state_dim = state_dim - self.concat = tf.keras.layers.Concatenate(axis=1) - self.GRU = tf.keras.layers.GRU(units=state_dim) - - def call(self, states, messages): - """ - Input - ----- - states: bs x #nodes x state_dim - messages: bs x #nodes x state_dim - """ - num_nodes = tf.shape(states)[1] - state_dim = self.state_dim - states = tf.reshape(states, [-1, 1, state_dim]) - messages = tf.reshape(messages, [-1, 1, state_dim]) - concat = self.concat([states, messages]) - updated_messages = self.GRU(concat) - updated_messages = tf.reshape(updated_messages, [-1, num_nodes, state_dim]) - return updated_messages - - -class MessagePassing(tf.keras.Model): - """ - > The message passing phrase runs for T time steps and is defined in terms of - 1. message function M_t - 2. vertex update function U_t - during the message passing phase, hidden states h_v^t at each node in the graph are updated based on - messages m_v^{t+1} according to: - 1. m_v^{t+1} = \Sigma_{w \in N(v)}{M_t(h_v^t, h_w^t, e_{vw})} - 2. h_v^{t+1} = U_t(h_v^t, m_v^{t+1}) - - To generalize a bit, we can use other aggregation function instead of summation. - """ - def __init__(self, state_dim, name='message_passing'): - super(MessagePassing, self).__init__(self, name=name) - self.state_dim = state_dim - self.message_function = EdgeNetwork(state_dim=state_dim, name=name + '/message_func') - self.message_aggregation = Aggregation(name=name + '/message_agg') - self.update_function = UpdateFunction(state_dim=state_dim, name=name + '/state_update') - - def call(self, states, edges, masks, training=False): - """ - Input - ----- - nodes: bs x #nodes x state_dim - edges: bs x #nodes^2 x #edge_features - masks: bs x #nodes^2 x 1 binary matrix indicating whether edge exist or not - - """ - num_nodes = tf.shape(states)[1] - state_dim = tf.shape(states)[2] - masks = tf.reshape(masks, [-1, num_nodes ** 2, 1]) - states_j = tf.tile(states, [1, num_nodes, 1]) - messages = self.message_function(states_j, edges) - masked_messages = tf.multiply(messages, masks) - # reshape to batch, from_node, to_node, message - masked_messages = tf.reshape(masked_messages, [-1, num_nodes, num_nodes, state_dim]) - aggregated_messages = self.message_aggregation(masked_messages) - updated_messages = self.update_function(states, aggregated_messages) - return updated_messages - - -class ReadoutEdge(tf.keras.Model): - def __init__(self, hidden_sizes, num_outputs, name='readout_edges'): - super(ReadoutEdge, self).__init__(name=name) - self.concat = tf.keras.layers.Concatenate() - self.hidden_layers = tf.keras.Sequential([ - tf.keras.layers.Dense(units=hidden_size, activation='relu', name=name + '/hidden_{}'.format(i)) - for i, hidden_size in enumerate(hidden_sizes)]) - self.last_linear = tf.keras.layers.Dense(units=num_outputs, name=name + '/last_linear') - - - def call(self, states, edges, training=False): - num_nodes = tf.shape(states)[1] - state_dim = tf.shape(states)[2] - states_i = tf.reshape(tf.tile(states, [1, 1, num_nodes]), [-1, num_nodes ** 2, state_dim]) # - states_j = tf.tile(states, [1, num_nodes, 1]) # - concat = self.concat([states_i, edges, states_j]) - features = self.hidden_layers(concat) - output = self.last_linear(features) - return output - - -class ReadoutNodes(tf.keras.Model): - def __init__(self, hidden_sizes, num_outputs, name='readout_nodes'): - super(ReadoutNodes, self).__init__(name=name) - self.hidden_layers = tf.keras.Sequential([ - tf.keras.layers.Dense(units=hidden_size, activation='relu', name=name + '/hidden_{}'.format(i)) - for i, hidden_size in enumerate(hidden_sizes)]) - self.last_linear = tf.keras.layers.Dense(units=num_outputs, name=name + '/last_linear') - - def call(self, states, training=False): - features = self.hidden_layers(states) - output = self.last_linear(features) - return output - - -class ReadoutGraph(tf.keras.Model): - def __init__(self, hidden_sizes, num_outputs, agg_function, name='readout_graph'): - super(ReadoutGraph, self).__init__(name=name) - self.agg_function = agg_function - self.hidden_layers = tf.keras.Sequential([ - tf.keras.layers.Dense(units=hidden_size, activation='relu', name=name + '/hidden_{}'.format(i)) - for i, hidden_size in enumerate(hidden_sizes)]) - self.last_linear = tf.keras.layers.Dense(units=num_outputs, name=name + '/last_linear') - - - def call(self, states, masks, training=False): - num_nodes = tf.shape(states)[1] - masks = tf.reshape(masks, [-1, num_nodes, 1]) - masked_states = tf.multiply(states, masks) - graph_states = self.agg_function(masked_states, keepdims = True) #KEPT DIMENSIONS - features = self.hidden_layers(graph_states) - output = self.last_linear(features) - return output - - -class MPNN(tf.keras.Model): - """Implementation of Message Passing Neural Network. - - reference: https://arxiv.org/abs/1704.01212i - """ - def __init__(self, hidden_sizes, num_outputs, state_dim, update_steps, name='mpnn'): - super(MPNN, self).__init__(name=name) - self.update_steps = int(update_steps) - self.node_embedding = tf.keras.layers.Dense(units=state_dim, activation='relu') - self.message_passing = MessagePassing(state_dim=state_dim) - self.readout_func = ReadoutGraph(hidden_sizes, num_outputs, Aggregation('sum', 1)) - - def call(self, nodes, edges, node_masks=None, edge_masks=None, training=False): - states = self.node_embedding(nodes) - for time_step in range(self.update_steps): - states = self.message_passing(states, edges, edge_masks, training=training) - readout = self.readout_func(states, node_masks, training=training) - return readout - - -def _test_edgenetwork(): - """testcase for edgenetwork forward pass - a batch of 32 graphs, each with 3 nodes, include self pointing eage - 9 edges per graph, each node has 5 features. - """ - edges = tf.random.uniform((32, 9, 5)) - states = tf.tile(tf.random.uniform((32, 3, 3)), [1, 3, 1]) - m = EdgeNetwork(state_dim=3) - o = m(states, edges) - assert o.shape == (32, 9, 3) - - -def _test_message_update(): - states = tf.random.uniform((32, 9, 3)) - messages = tf.random.uniform((32, 9, 3)) - m = UpdateFunction(3) - o = m(states, messages) - assert o.shape == (32, 9, 3) - - -def _test_message_passing(): - """test case for message passing. - - a batch of 2 graphs, each has 2 nodes, each node has a state vector of size 2, each edge has 3 features. - """ - states = tf.convert_to_tensor([[[1, 2], [2, 1]], [[3, 4], [4, 3]]], dtype='float') - edges = tf.convert_to_tensor([ - [[0, 0, 0], [1, 2, 3], [3, 2, 1], [0, 0, 0]], [[0, 0, 0], [3, 4, 2], [2, 4, 3], [0, 0, 0]] - ], dtype='float') - masks = tf.expand_dims(tf.convert_to_tensor([[[0], [1], [1], [0]], [[0], [1], [1], [0]]], dtype='float'), axis=-1) - m = MessagePassing(2) - o = m(states, edges, masks) - assert o.shape == (2, 2, 2) - - -def _test_edge_readout(): - states = tf.random.uniform((32, 3, 3)) - edges = tf.random.uniform((32, 9, 2)) - m = ReadoutEdge([3, 2], 1) - o = m(states, edges) - assert o.shape == (32, 9, 1) - - -def _test_node_readout(): - states = tf.random.uniform((32, 3, 3)) - m = ReadoutNodes([3, 2], 1) - o = m(states) - assert o.shape == (32, 3, 1) - - -def _test_graph_readout(): - states = tf.random.uniform((32, 3, 3)) - masks = tf.expand_dims( - tf.convert_to_tensor([[1, 1, 0]] * 8 + [[1, 0, 1]] * 8 + [[0, 1, 1]] * 16, dtype='float'), - axis=-1) - agg_func = Aggregation(method='sum', axis=1) - m = ReadoutGraph([3, 3], 1, agg_func) - o = m(states, masks) - assert o.shape == (32, 1) - - -def _test_mpnn(): - nodes = tf.random.uniform((32, 3, 3)) - edges = tf.random.uniform((32, 3 * 3, 2)) - node_masks = tf.expand_dims( - tf.convert_to_tensor([[1, 1, 0]] * 8 + [[1, 0, 1]] * 10 + [[0, 1, 1]] * 14, dtype='float'), - axis=-1) - edge_masks = tf.expand_dims( - tf.convert_to_tensor([[0, 1, 0, 1, 0, 1, 0, 1, 0]] * 16 + [[0, 1, 1, 1, 0, 0, 1, 0, 0]] * 16, dtype='float'), - axis=-1) - m = MPNN([5, 5,], 1, 8, 3) - o = m(nodes, edges, node_masks=node_masks, edge_masks=edge_masks) - print("-----THIS WORKS ------") - assert o.shape == (32, 1) - -if __name__ == '__main__': - _test_edgenetwork() - _test_message_update() - _test_message_passing() - _test_edge_readout() - _test_node_readout() - _test_graph_readout() - _test_mpnn() diff --git a/mlpf/tfmodel/opt.py b/mlpf/tfmodel/opt.py deleted file mode 100644 index 26ca0e86e..000000000 --- a/mlpf/tfmodel/opt.py +++ /dev/null @@ -1,91 +0,0 @@ -from tensorboard.plugins.hparams import api as hp -import tensorflow as tf -from tf_model import load_dataset_ttbar, my_loss_full, num_max_elems, weight_schemes, PFNet -from tf_model import cls_130, cls_211, cls_22, energy_resolution, eta_resolution, phi_resolution -from argparse import Namespace -import kerastuner as kt - -args = Namespace() -args.datapath = "./data/TTbar_14TeV_TuneCUETP8M1_cfi" -args.ntrain = 10000 -args.ntest = 1000 -args.weights = "inverse" -args.convlayer = "ghconv" -args.batch_size = 1 -args.nepochs = 20 -args.target = "cand" -args.lr = 0.0001 -args.outdir = "testout" - -def model_builder(hp): - args.hidden_dim_id = hp.Choice('hidden_dim_id', values = [16, 32, 64, 128, 256]) - args.hidden_dim_reg = hp.Choice('hidden_dim_reg', values = [16, 32, 64, 128, 256]) - args.num_hidden_id_enc = hp.Choice('hidden_dim_id_enc', values = [0, 1, 2, 3]) - args.num_hidden_id_dec = hp.Choice('hidden_dim_id_dec', values = [0, 1, 2, 3]) - args.num_hidden_reg_enc = hp.Choice('hidden_dim_reg_enc', values = [0, 1, 2, 3]) - args.num_hidden_reg_dec = hp.Choice('hidden_dim_reg_dec', values = [0, 1, 2, 3]) - args.num_convs_id = hp.Choice('num_convs_id', values = [1, 2, 3, 4]) - args.num_convs_reg = hp.Choice('num_convs_reg', values = [1, 2, 3, 4]) - args.distance_dim = hp.Choice('distance_dim', values = [16, 32, 64, 128, 256]) - args.num_neighbors = hp.Choice('num_neighbors', [2, 3, 4, 5, 6, 7, 8, 9, 10]) - args.dropout = hp.Choice('dropout', values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]) - args.bin_size = hp.Choice('bin_size', values = [100, 200, 500, 1000]) - args.dist_mult = hp.Choice('dist_mult', values = [0.1, 1.0, 10.0]) - args.cosine_dist = hp.Choice('cosine_dist', values = [True, False]) - - model = PFNet( - num_hidden_id_enc=args.num_hidden_id_enc, - num_hidden_id_dec=args.num_hidden_id_dec, - hidden_dim_id=args.hidden_dim_id, - num_hidden_reg_enc=args.num_hidden_reg_enc, - num_hidden_reg_dec=args.num_hidden_reg_dec, - hidden_dim_reg=args.hidden_dim_reg, - num_convs_id=args.num_convs_id, - num_convs_reg=args.num_convs_reg, - distance_dim=args.distance_dim, - convlayer=args.convlayer, - dropout=args.dropout, - bin_size=args.bin_size, - num_neighbors=args.num_neighbors, - dist_mult=args.dist_mult, - cosine_dist=args.cosine_dist - ) - loss_fn = my_loss_full - opt = tf.keras.optimizers.Adam(learning_rate=args.lr) - print(args) - - model.compile(optimizer=opt, loss=loss_fn, - metrics=[cls_130, cls_211, cls_22, energy_resolution, eta_resolution, phi_resolution], - sample_weight_mode="temporal") - return model - -if __name__ == "__main__": - global_batch_size = args.batch_size - dataset = load_dataset_ttbar(args.datapath, args.target) - - ps = (tf.TensorShape([num_max_elems, 15]), tf.TensorShape([num_max_elems, 5]), tf.TensorShape([num_max_elems, ])) - ds_train = dataset.take(args.ntrain).map(weight_schemes[args.weights]).padded_batch(global_batch_size, padded_shapes=ps) - ds_test = dataset.skip(args.ntrain).take(args.ntest).map(weight_schemes[args.weights]).padded_batch(global_batch_size, padded_shapes=ps) - - ds_train_r = ds_train.repeat() - ds_test_r = ds_test.repeat() - - tuner = kt.Hyperband( - model_builder, - objective = 'val_loss', - max_epochs = args.nepochs, - factor = 3, - hyperband_iterations = 3, - directory = '/scratch/joosep/kerastuner_out', - project_name = 'mlpf') - - #tuner.search( - # ds_train_r, - # validation_data=ds_test_r, - # steps_per_epoch=args.ntrain/args.batch_size, - # validation_steps=args.ntest/args.batch_size, - # #callbacks=[tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss')] - #) - tuner.results_summary() - for trial in tuner.oracle.get_best_trials(num_trials=10): - print(trial.hyperparameters.values, trial.score) diff --git a/mlpf/tfmodel/pred_tf_model.py b/mlpf/tfmodel/pred_tf_model.py deleted file mode 100644 index 7f2a0bd5f..000000000 --- a/mlpf/tfmodel/pred_tf_model.py +++ /dev/null @@ -1,156 +0,0 @@ -import os -import time -import glob -import numpy as np -import json - -from tf_model import parse_args - -def get_X(X,y,w): - return X - -def parse_args(): - import argparse - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str, default="PFNet", help="type of model to train", choices=["PFNet"]) - parser.add_argument("--weights", type=str, default=None, help="model weights to load") - parser.add_argument("--hidden-dim-id", type=int, default=256, help="hidden dimension") - parser.add_argument("--hidden-dim-reg", type=int, default=256, help="hidden dimension") - parser.add_argument("--batch-size", type=int, default=1, help="number of events in training batch") - parser.add_argument("--num-convs-id", type=int, default=1, help="number of convolution layers") - parser.add_argument("--num-convs-reg", type=int, default=1, help="number of convolution layers") - parser.add_argument("--num-hidden-id-enc", type=int, default=2, help="number of encoder layers for multiclass") - parser.add_argument("--num-hidden-id-dec", type=int, default=2, help="number of decoder layers for multiclass") - parser.add_argument("--num-hidden-reg-enc", type=int, default=2, help="number of encoder layers for regression") - parser.add_argument("--num-hidden-reg-dec", type=int, default=2, help="number of decoder layers for regression") - parser.add_argument("--num-neighbors", type=int, default=5, help="number of knn neighbors") - parser.add_argument("--distance-dim", type=int, default=256, help="distance dimension") - parser.add_argument("--bin-size", type=int, default=100, help="number of points per LSH bin") - parser.add_argument("--dist-mult", type=float, default=1.0, help="Exponential multiplier") - parser.add_argument("--num-conv", type=int, default=1, help="number of convolution layers (powers)") - parser.add_argument("--attention-layer-cutoff", type=float, default=0.2, help="Sparsify attention matrix by masking values below this threshold") - parser.add_argument("--nthreads", type=int, default=-1, help="number of threads to use") - parser.add_argument("--ntrain", type=int, default=80, help="number of training events") - parser.add_argument("--ntest", type=int, default=20, help="number of testing events") - parser.add_argument("--gpu", action="store_true", help="use GPU") - parser.add_argument("--synthetic-timing", action="store_true", help="run a synthetic timing check, which is time consuming") - parser.add_argument("--convlayer", type=str, default="sgconv", choices=["sgconv", "ghconv"], help="Type of graph convolutional layer") - parser.add_argument("--datapath", type=str, help="Input data path", required=True) - parser.add_argument("--target", type=str, choices=["cand", "gen"], help="Regress to PFCandidates or GenParticles", default="gen") - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - - if args.gpu: - import setGPU - else: - os.environ["CUDA_VISIBLE_DEVICES"] = "" - - import tensorflow as tf - - physical_devices = tf.config.list_physical_devices('GPU') - if len(physical_devices) > 0: - tf.config.experimental.set_memory_growth(physical_devices[0], True) - tf.config.experimental_run_functions_eagerly(False) - - from tf_model import num_max_elems - - tf.gfile = tf.io.gfile - from tf_model import PFNet, prepare_df - from tf_data import _parse_tfr_element - tfr_files = glob.glob("{}/tfr/{}/*.tfrecords".format(args.datapath, args.target)) - assert(len(tfr_files)>0) - tf.config.optimizer.set_jit(False) - - if args.nthreads > 0: - tf.config.threading.set_inter_op_parallelism_threads(args.nthreads) - tf.config.threading.set_intra_op_parallelism_threads(args.nthreads) - if not args.gpu: - tf.config.set_visible_devices([], 'GPU') - - nev = args.ntest - ps = (tf.TensorShape([num_max_elems, 15]), tf.TensorShape([num_max_elems, 5]), tf.TensorShape([num_max_elems, ])) - dataset = tf.data.TFRecordDataset(tfr_files).map( - _parse_tfr_element, num_parallel_calls=tf.data.experimental.AUTOTUNE).skip(args.ntrain).take(nev).padded_batch(args.batch_size, padded_shapes=ps) - dataset_X = dataset.map(get_X) - - base_model = PFNet( - hidden_dim_id=args.hidden_dim_id, - hidden_dim_reg=args.hidden_dim_reg, - num_convs_id=args.num_convs_id, - num_convs_reg=args.num_convs_reg, - num_hidden_id_enc=args.num_hidden_id_enc, - num_hidden_id_dec=args.num_hidden_id_dec, - num_hidden_reg_enc=args.num_hidden_reg_enc, - num_hidden_reg_dec=args.num_hidden_reg_dec, - distance_dim=args.distance_dim, - convlayer=args.convlayer, - dropout=0.0, - bin_size=args.bin_size, - num_neighbors=args.num_neighbors, - dist_mult=args.dist_mult - ) - model = base_model.create_model(num_max_elems, training=False) - - #load the weights - model.load_weights(args.weights) - model_dir = os.path.dirname(args.weights) - - #prepare the dataframe - prepare_df(model, dataset, model_dir, args.target, save_raw=False) - - print("now timing") - neval = 0 - t0 = time.time() - for X in dataset_X: - ret = model(X) - print(".", end="") - neval += 1 - print() - t1 = time.time() - time_per_dsrow = (t1-t0)/neval - time_per_event = time_per_dsrow/args.batch_size - print("prediction time per event: {:.2f} ms".format(1000.0*time_per_event)) - - if args.synthetic_timing: - synthetic_timing_data = [] - for iteration in range(3): - numev = 500 - for evsize in [1000, 5000, 10000, 20000]: - for batch_size in [1,2,4,10,20] if args.gpu else [1, ]: - t0 = time.time() - for i in range(numev//batch_size): - x = np.random.randn(batch_size, evsize, 15) - model(x) - t1 = time.time() - dt = t1 - t0 - time_per_event = 1000.0*(dt / numev) - synthetic_timing_data.append( - [{"iteration": iteration, "batch_size": batch_size, "event_size": evsize, "time_per_event": time_per_event}]) - print("Synthetic random data: batch_size={} event_size={}, time={:.2f} ms/ev".format(batch_size, evsize, time_per_event)) - - with open("{}/synthetic_timing_gpu{}.json".format(model_dir, int(args.gpu)), "w") as fi: - json.dump(synthetic_timing_data, fi) - - #https://leimao.github.io/blog/Save-Load-Inference-From-TF2-Frozen-Graph/ - # Get frozen ConcreteFunction - full_model = tf.function(lambda x: base_model(x, training=False)) - full_model = full_model.get_concrete_function( - tf.TensorSpec((None, None, 15), tf.float32)) - from tensorflow.python.framework import convert_to_constants - frozen_func = convert_to_constants.convert_variables_to_constants_v2(full_model) - frozen_func.graph.as_graph_def() - print(full_model.graph.inputs) - print(full_model.graph.outputs) - - tf.io.write_graph(graph_or_graph_def=frozen_func.graph, - logdir="{}/model_frozen".format(model_dir), - name="frozen_graph.pb", - as_text=False) - tf.io.write_graph(graph_or_graph_def=frozen_func.graph, - logdir="{}/model_frozen".format(model_dir), - name="frozen_graph.pbtxt", - as_text=True) - #model.save('model', overwrite=True, include_optimizer=False) diff --git a/mlpf/tfmodel/tf_data.py b/mlpf/tfmodel/tf_data.py deleted file mode 100644 index a1d0d3939..000000000 --- a/mlpf/tfmodel/tf_data.py +++ /dev/null @@ -1,128 +0,0 @@ -import numpy as np -import glob -import multiprocessing -import os - -import tensorflow as tf -from tf_model import load_one_file - -def parse_args(): - import argparse - parser = argparse.ArgumentParser() - parser.add_argument("--target", type=str, choices=["cand", "gen"], help="Regress to PFCandidates or GenParticles", default="cand") - parser.add_argument("--datapath", type=str, required=True, help="Input data path") - parser.add_argument("--num-files-per-tfr", type=int, default=100, help="Number of pickle files to merge to one TFRecord file") - args = parser.parse_args() - return args - -def chunks(lst, n): - """Yield successive n-sized chunks from lst.""" - for i in range(0, len(lst), n): - yield lst[i:i + n] - -#https://stackoverflow.com/questions/47861084/how-to-store-numpy-arrays-as-tfrecord -def _bytes_feature(value): - """Returns a bytes_list from a string / byte.""" - if isinstance(value, type(tf.constant(0))): # if value ist tensor - value = value.numpy() # get value of tensor - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - -def _parse_tfr_element(element): - parse_dic = { - 'X': tf.io.FixedLenFeature([], tf.string), - 'y': tf.io.FixedLenFeature([], tf.string), - 'w': tf.io.FixedLenFeature([], tf.string), - } - example_message = tf.io.parse_single_example(element, parse_dic) - - X = example_message['X'] - arr_X = tf.io.parse_tensor(X, out_type=tf.float32) - y = example_message['y'] - arr_y = tf.io.parse_tensor(y, out_type=tf.float32) - w = example_message['w'] - arr_w = tf.io.parse_tensor(w, out_type=tf.float32) - - #https://github.com/tensorflow/tensorflow/issues/24520#issuecomment-577325475 - arr_X.set_shape(tf.TensorShape((None, 15))) - arr_y.set_shape(tf.TensorShape((None, 5))) - arr_w.set_shape(tf.TensorShape((None, ))) - #inds = tf.stack([arr_dm_row, arr_dm_col], axis=-1) - #dm_sparse = tf.SparseTensor(values=arr_dm_data, indices=inds, dense_shape=[tf.shape(arr_X)[0], tf.shape(arr_X)[0]]) - - return arr_X, arr_y, arr_w - -def serialize_X_y_w(writer, X, y, w): - feature = { - 'X': _bytes_feature(tf.io.serialize_tensor(X)), - 'y': _bytes_feature(tf.io.serialize_tensor(y)), - 'w': _bytes_feature(tf.io.serialize_tensor(w)), - } - sample = tf.train.Example(features=tf.train.Features(feature=feature)) - writer.write(sample.SerializeToString()) - -def serialize_chunk(args): - path, files, ichunk, target = args - out_filename = os.path.join(path, "chunk_{}.tfrecords".format(ichunk)) - writer = tf.io.TFRecordWriter(out_filename) - Xs = [] - ys = [] - ws = [] - dms = [] - - for fi in files: - X, y, ycand = load_one_file(fi) - - Xs += X - if target == "cand": - ys += ycand - elif target == "gen": - ys += y - else: - raise Exception("Unknown target") - - #set weights for each sample to be equal to the number of samples of this type - #in the training script, this can be used to compute either inverse or class-balanced weights - uniq_vals, uniq_counts = np.unique(np.concatenate([y[:, 0] for y in ys]), return_counts=True) - for i in range(len(ys)): - w = np.ones(len(ys[i]), dtype=np.float32) - for uv, uc in zip(uniq_vals, uniq_counts): - w[ys[i][:, 0]==uv] = uc - ws += [w] - - for X, y, w in zip(Xs, ys, ws): - serialize_X_y_w(writer, X, y, w) - - writer.close() - -if __name__ == "__main__": - args = parse_args() - tf.config.experimental_run_functions_eagerly(True) - - datapath = args.datapath - - filelist = sorted(glob.glob("{}/raw/*.pkl".format(datapath))) - print("found {} files".format(len(filelist))) - #means, stds = extract_means_stds(filelist) - outpath = "{}/tfr/{}".format(datapath, args.target) - - if not os.path.isdir(outpath): - os.makedirs(outpath) - - pars = [] - for ichunk, files in enumerate(chunks(filelist, args.num_files_per_tfr)): - pars += [(outpath, files, ichunk, args.target)] - #serialize_chunk(pars[0]) - pool = multiprocessing.Pool(20) - pool.map(serialize_chunk, pars) - - #Load and test the dataset - tfr_dataset = tf.data.TFRecordDataset(glob.glob(outpath + "/*.tfrecords")) - dataset = tfr_dataset.map(_parse_tfr_element) - num_ev = 0 - num_particles = 0 - for X, y, w in dataset: - num_ev += 1 - num_particles += len(X) - - print("Created TFRecords dataset in {} with {} events, {} particles".format( - datapath, num_ev, num_particles)) diff --git a/mlpf/tfmodel/utils.py b/mlpf/tfmodel/utils.py index b568e3691..ca401f47c 100644 --- a/mlpf/tfmodel/utils.py +++ b/mlpf/tfmodel/utils.py @@ -1,21 +1,16 @@ -import os -import yaml -from pathlib import Path import datetime +import logging +import os import platform -import random -import glob -import numpy as np -from tqdm import tqdm import re -import logging +from pathlib import Path +import numpy as np import tensorflow as tf import tensorflow_addons as tfa - -from tfmodel.data import Dataset -from tfmodel.onecycle_scheduler import OneCycleScheduler, MomentumOneCycleScheduler +import yaml from tfmodel.datasets import CMSDatasetFactory, DelphesDatasetFactory +from tfmodel.onecycle_scheduler import MomentumOneCycleScheduler, OneCycleScheduler @tf.function @@ -32,9 +27,13 @@ def histogram_2d(eta, phi, weights_px, weights_py, eta_range, phi_range, nbins, hist_pt = tf.sqrt(hist_px**2 + hist_py**2) return hist_pt + @tf.function def batched_histogram_2d(eta, phi, w_px, w_py, x_range, y_range, nbins, bin_dtype=tf.float32): - return tf.vectorized_map(lambda a: histogram_2d(a[0], a[1], a[2], a[3], x_range, y_range, nbins, bin_dtype), (eta, phi, w_px, w_py)) + return tf.vectorized_map( + lambda a: histogram_2d(a[0], a[1], a[2], a[3], x_range, y_range, nbins, bin_dtype), (eta, phi, w_px, w_py) + ) + def load_config(config_file_path): with open(config_file_path, "r") as ymlfile: @@ -47,8 +46,7 @@ def parse_config(config, ntrain=None, ntest=None, nepochs=None, weights=None): config = load_config(config) tf.config.run_functions_eagerly(config["tensorflow"]["eager"]) - n_epochs = config["setup"]["num_epochs"] - + if ntrain: config["setup"]["num_events_train"] = ntrain @@ -110,8 +108,10 @@ def delete_all_but_best_checkpoint(train_dir, dry_run): def get_strategy(): if isinstance(os.environ.get("CUDA_VISIBLE_DEVICES"), type(None)) or len(os.environ.get("CUDA_VISIBLE_DEVICES")) == 0: gpus = [-1] - print("WARNING: CUDA_VISIBLE_DEVICES variable is empty. \ - If you don't have or intend to use GPUs, this message can be ignored.") + print( + "WARNING: CUDA_VISIBLE_DEVICES variable is empty. \ + If you don't have or intend to use GPUs, this message can be ignored." + ) else: gpus = [int(x) for x in os.environ.get("CUDA_VISIBLE_DEVICES", "-1").split(",")] if gpus[0] == -1: @@ -174,7 +174,7 @@ def get_lr_schedule(config, steps): else: lr_schedule = None callbacks = [] - return lr_schedule, callbacks,lr + return lr_schedule, callbacks, lr def get_optimizer(config, lr_schedule=None): @@ -188,6 +188,7 @@ def get_optimizer(config, lr_schedule=None): opt = tf.keras.optimizers.Adam(learning_rate=lr, amsgrad=cfg_adam["amsgrad"]) if cfg_adam["pcgrad"]: from tfmodel.PCGrad_tf import PCGrad + opt = PCGrad(opt) return opt if config["setup"]["optimizer"] == "adamw": @@ -197,11 +198,14 @@ def get_optimizer(config, lr_schedule=None): cfg_sgd = config["optimizer"]["sgd"] return tf.keras.optimizers.SGD(learning_rate=lr, momentum=cfg_sgd["momentum"], nesterov=cfg_sgd["nesterov"]) else: - raise ValueError("Only 'adam', 'adamw' and 'sgd' are supported optimizers, got {}".format(config["setup"]["optimizer"])) + raise ValueError( + "Only 'adam', 'adamw' and 'sgd' are supported optimizers, got {}".format(config["setup"]["optimizer"]) + ) def get_tuner(cfg_hypertune, model_builder, outdir, recreate, strategy): import keras_tuner as kt + if cfg_hypertune["algorithm"] == "random": print("Keras Tuner: Using RandomSearch") cfg_rand = cfg_hypertune["random"] @@ -254,145 +258,63 @@ def compute_weights_none(X, y, w): def make_weight_function(config): - def weight_func(X,y,w): + def weight_func(X, y, w): - w_signal_only = tf.where(y[:, 0]==0, 0.0, 1.0) - w_signal_only *= tf.cast(X[:, 0]!=0, tf.float32) + w_signal_only = tf.where(y[:, 0] == 0, 0.0, 1.0) + w_signal_only *= tf.cast(X[:, 0] != 0, tf.float32) w_none = tf.ones_like(w) - w_none *= tf.cast(X[:, 0]!=0, tf.float32) + w_none *= tf.cast(X[:, 0] != 0, tf.float32) - w_invsqrt = tf.cast(tf.shape(w)[-1], tf.float32)/tf.sqrt(w) - w_invsqrt *= tf.cast(X[:, 0]!=0, tf.float32) + w_invsqrt = tf.cast(tf.shape(w)[-1], tf.float32) / tf.sqrt(w) + w_invsqrt *= tf.cast(X[:, 0] != 0, tf.float32) - w_signal_only_invsqrt = tf.where(y[:, 0]==0, 0.0, tf.cast(tf.shape(w)[-1], tf.float32)/tf.sqrt(w)) - w_signal_only_invsqrt *= tf.cast(X[:, 0]!=0, tf.float32) + w_signal_only_invsqrt = tf.where(y[:, 0] == 0, 0.0, tf.cast(tf.shape(w)[-1], tf.float32) / tf.sqrt(w)) + w_signal_only_invsqrt *= tf.cast(X[:, 0] != 0, tf.float32) weight_d = { "none": w_none, "signal_only": w_signal_only, "signal_only_inverse_sqrt": w_signal_only_invsqrt, - "inverse_sqrt": w_invsqrt + "inverse_sqrt": w_invsqrt, } ret_w = {} for loss_component, weight_type in config["sample_weights"].items(): ret_w[loss_component] = weight_d[weight_type] - return X,y,ret_w + return X, y, ret_w + return weight_func def targets_multi_output(num_output_classes): def func(X, y, w): - msk = tf.expand_dims(tf.cast(y[:, :, 0]!=0, tf.float32), axis=-1) + msk = tf.expand_dims(tf.cast(y[:, :, 0] != 0, tf.float32), axis=-1) return ( X, { "cls": tf.one_hot(tf.cast(y[:, :, 0], tf.int32), num_output_classes), - "charge": y[:, :, 1:2]*msk, - "pt": y[:, :, 2:3]*msk, - "eta": y[:, :, 3:4]*msk, - "sin_phi": y[:, :, 4:5]*msk, - "cos_phi": y[:, :, 5:6]*msk, - "energy": y[:, :, 6:7]*msk, + "charge": y[:, :, 1:2] * msk, + "pt": y[:, :, 2:3] * msk, + "eta": y[:, :, 3:4] * msk, + "sin_phi": y[:, :, 4:5] * msk, + "cos_phi": y[:, :, 5:6] * msk, + "energy": y[:, :, 6:7] * msk, }, w, ) return func -def get_dataset_def(config): - cds = config["dataset"] - - return Dataset( - num_input_features=int(cds["num_input_features"]), - num_output_features=int(cds["num_output_features"]), - padded_num_elem_size=int(cds["padded_num_elem_size"]), - schema=cds["schema"], - ) - - -def get_train_val_datasets(config, global_batch_size, n_train, n_test, repeat=True): - dataset_def = get_dataset_def(config) - - tfr_files = sorted(glob.glob(dataset_def.processed_path)) - if len(tfr_files) == 0: - raise Exception("Could not find any files in {}".format(dataset_def.processed_path)) - - random.shuffle(tfr_files) - dataset = tf.data.TFRecordDataset(tfr_files).map( - dataset_def.parse_tfr_element, num_parallel_calls=tf.data.experimental.AUTOTUNE - ) - - # Due to TFRecords format, the length of the dataset is not known beforehand - num_events = 0 - for _ in dataset: - num_events += 1 - print("dataset loaded, len={}".format(num_events)) - - weight_func = make_weight_function(config) - assert(n_train + n_test <= num_events) - - # Padded shapes - ps = ( - tf.TensorShape([dataset_def.padded_num_elem_size, dataset_def.num_input_features]), - tf.TensorShape([dataset_def.padded_num_elem_size, dataset_def.num_output_features]), - { - "cls": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - "charge": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - "energy": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - "pt": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - "eta": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - "sin_phi": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - "cos_phi": tf.TensorShape([dataset_def.padded_num_elem_size, ]), - } - ) - - ds_train = dataset.take(n_train).map(weight_func).padded_batch(global_batch_size, padded_shapes=ps) - ds_test = dataset.skip(n_train).take(n_test).map(weight_func).padded_batch(global_batch_size, padded_shapes=ps) - - if config["setup"]["multi_output"]: - dataset_transform = targets_multi_output(config["dataset"]["num_output_classes"]) - ds_train = ds_train.map(dataset_transform) - ds_test = ds_test.map(dataset_transform) - else: - dataset_transform = None - - return ds_train, ds_test, dataset_transform - -def prepare_val_data(config, dataset_def, single_file=False): - if single_file: - val_filelist = dataset_def.val_filelist[:1] - else: - val_filelist = dataset_def.val_filelist - if config["setup"]["num_val_files"] > 0: - val_filelist = val_filelist[: config["setup"]["num_val_files"]] - - Xs = [] - ygens = [] - ycands = [] - for fi in tqdm(val_filelist, desc="Preparing validation data"): - X, ygen, ycand = dataset_def.prepare_data(fi) - Xs.append(np.concatenate(X)) - ygens.append(np.concatenate(ygen)) - ycands.append(np.concatenate(ycand)) - - assert(len(Xs) > 0, "Xs is empty") - X_val = np.concatenate(Xs) - ygen_val = np.concatenate(ygens) - ycand_val = np.concatenate(ycands) - - return X_val, ygen_val, ycand_val - def get_heptfds_dataset(dataset_name, config, num_gpus, split, num_events=None, supervised=True): cds = config["dataset"] - if cds['schema'] == "cms": + if cds["schema"] == "cms": dsf = CMSDatasetFactory(config) - elif cds['schema'] == "delphes": + elif cds["schema"] == "delphes": dsf = DelphesDatasetFactory(config) else: raise ValueError("Only supported datasets are 'cms' and 'delphes'.") @@ -407,6 +329,7 @@ def get_heptfds_dataset(dataset_name, config, num_gpus, split, num_events=None, return ds, ds_info + def load_and_interleave(dataset_names, config, num_gpus, split, batch_size): datasets = [] steps = [] @@ -415,17 +338,17 @@ def load_and_interleave(dataset_names, config, num_gpus, split, batch_size): ds, _ = get_heptfds_dataset(ds_name, config, num_gpus, split) num_steps = ds.cardinality().numpy() total_num_steps += num_steps - assert(num_steps > 0) + assert num_steps > 0 print("Loaded {}:{} with {} steps".format(ds_name, split, num_steps)) datasets.append(ds) steps.append(num_steps) - #Now interleave elements from the datasets randomly + # Now interleave elements from the datasets randomly ids = 0 indices = [] for ds, num_steps in zip(datasets, steps): - indices += num_steps*[ids] + indices += num_steps * [ids] ids += 1 indices = np.array(indices, np.int64) np.random.shuffle(indices) @@ -435,18 +358,19 @@ def load_and_interleave(dataset_names, config, num_gpus, split, batch_size): ds = tf.data.experimental.choose_from_datasets(datasets, choice_dataset) bs = batch_size if not config["setup"]["horovod_enabled"]: - if num_gpus>1: - bs = bs*num_gpus + if num_gpus > 1: + bs = bs * num_gpus ds = ds.batch(bs) total_num_steps = total_num_steps // bs - #num_steps = 0 - #for _ in ds: + # num_steps = 0 + # for _ in ds: # num_steps += 1 - #assert(total_num_steps == num_steps) + # assert(total_num_steps == num_steps) return ds, total_num_steps -#Load multiple datasets and mix them together + +# Load multiple datasets and mix them together def get_datasets(datasets_to_interleave, config, num_gpus, split): datasets = [] steps = [] @@ -455,16 +379,18 @@ def get_datasets(datasets_to_interleave, config, num_gpus, split): if ds_conf["datasets"] is None: logging.warning("No datasets in {} list.".format(joint_dataset_name)) else: - interleaved_ds, num_steps = load_and_interleave(ds_conf["datasets"], config, num_gpus, split, ds_conf["batch_per_gpu"]) + interleaved_ds, num_steps = load_and_interleave( + ds_conf["datasets"], config, num_gpus, split, ds_conf["batch_per_gpu"] + ) print("Interleaved joint dataset {} with {} steps".format(joint_dataset_name, num_steps)) datasets.append(interleaved_ds) steps.append(num_steps) - + ids = 0 indices = [] total_num_steps = 0 for ds, num_steps in zip(datasets, steps): - indices += num_steps*[ids] + indices += num_steps * [ids] total_num_steps += num_steps ids += 1 indices = np.array(indices, np.int64) @@ -472,14 +398,15 @@ def get_datasets(datasets_to_interleave, config, num_gpus, split): choice_dataset = tf.data.Dataset.from_tensor_slices(indices) ds = tf.data.experimental.choose_from_datasets(datasets, choice_dataset) - #num_steps = 0 - #for elem in ds: + # num_steps = 0 + # for elem in ds: # num_steps += 1 - #assert(total_num_steps == num_steps) + # assert(total_num_steps == num_steps) print("Final dataset with {} steps".format(total_num_steps)) return ds, total_num_steps + def set_config_loss(config, trainable): if trainable == "classification": config["dataset"]["pt_loss_coef"] = 0.0 @@ -501,7 +428,9 @@ def set_config_loss(config, trainable): def get_class_loss(config): if config["setup"]["classification_loss_type"] == "categorical_cross_entropy": - cls_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False, label_smoothing=config["setup"].get("classification_label_smoothing", 0.0)) + cls_loss = tf.keras.losses.CategoricalCrossentropy( + from_logits=False, label_smoothing=config["setup"].get("classification_label_smoothing", 0.0) + ) elif config["setup"]["classification_loss_type"] == "sigmoid_focal_crossentropy": cls_loss = tfa.losses.sigmoid_focal_crossentropy else: @@ -515,28 +444,30 @@ def get_loss_from_params(input_dict): loss_cls = getattr(tf.keras.losses, loss_type) return loss_cls(**input_dict) -#batched version of https://github.com/VinAIResearch/DSW/blob/master/gsw.py#L19 + +# batched version of https://github.com/VinAIResearch/DSW/blob/master/gsw.py#L19 @tf.function def sliced_wasserstein_loss(y_true, y_pred, num_projections=1000): - - #take everything but the jet_idx + + # take everything but the jet_idx y_true = y_true[..., :5] y_pred = y_pred[..., :5] - #create normalized random basis vectors + # create normalized random basis vectors theta = tf.random.normal((num_projections, y_true.shape[-1])) theta = theta / tf.sqrt(tf.reduce_sum(theta**2, axis=1, keepdims=True)) - #project the features with the random basis + # project the features with the random basis A = tf.linalg.matmul(y_true, theta, False, True) B = tf.linalg.matmul(y_pred, theta, False, True) A_sorted = tf.sort(A, axis=-2) B_sorted = tf.sort(B, axis=-2) - ret = tf.math.sqrt(tf.reduce_sum(tf.math.pow(A_sorted - B_sorted, 2), axis=[-1,-2])) + ret = tf.math.sqrt(tf.reduce_sum(tf.math.pow(A_sorted - B_sorted, 2), axis=[-1, -2])) return ret + @tf.function def hist_loss_2d(y_true, y_pred): @@ -548,44 +479,48 @@ def hist_loss_2d(y_true, y_pred): pt_true = y_true[..., 0] pt_pred = y_pred[..., 0] - px_true = pt_true*y_true[..., 4] - py_true = pt_true*y_true[..., 3] - px_pred = pt_pred*y_pred[..., 4] - py_pred = pt_pred*y_pred[..., 3] + px_true = pt_true * y_true[..., 4] + py_true = pt_true * y_true[..., 3] + px_pred = pt_pred * y_pred[..., 4] + py_pred = pt_pred * y_pred[..., 3] pt_hist_true = batched_histogram_2d( - eta_true, - phi_true, - px_true, - py_true, - tf.cast([-6.0,6.0], tf.float32), tf.cast([-4.0,4.0], tf.float32), 20 + eta_true, phi_true, px_true, py_true, tf.cast([-6.0, 6.0], tf.float32), tf.cast([-4.0, 4.0], tf.float32), 20 ) pt_hist_pred = batched_histogram_2d( - eta_pred, - phi_pred, - px_pred, - py_pred, - tf.cast([-6.0,6.0], tf.float32), tf.cast([-4.0,4.0], tf.float32), 20 + eta_pred, phi_pred, px_pred, py_pred, tf.cast([-6.0, 6.0], tf.float32), tf.cast([-4.0, 4.0], tf.float32), 20 ) - mse = tf.math.sqrt(tf.reduce_mean((pt_hist_true-pt_hist_pred)**2, axis=[-1,-2])) + mse = tf.math.sqrt(tf.reduce_mean((pt_hist_true - pt_hist_pred) ** 2, axis=[-1, -2])) return mse @tf.function def jet_reco(px, py, jet_idx, max_jets): - tf.debugging.assert_shapes([ - (px, ('N')), - (py, ('N')), - (jet_idx, ('N')), - ]) + tf.debugging.assert_shapes( + [ + (px, ("N")), + (py, ("N")), + (jet_idx, ("N")), + ] + ) jet_idx_capped = tf.where(jet_idx <= max_jets, jet_idx, 0) - jet_px = tf.zeros([max_jets, ], dtype=px.dtype) - jet_py = tf.zeros([max_jets, ], dtype=py.dtype) + jet_px = tf.zeros( + [ + max_jets, + ], + dtype=px.dtype, + ) + jet_py = tf.zeros( + [ + max_jets, + ], + dtype=py.dtype, + ) jet_px_new = tf.tensor_scatter_nd_add(jet_px, indices=tf.expand_dims(jet_idx_capped, axis=-1), updates=px) jet_py_new = tf.tensor_scatter_nd_add(jet_py, indices=tf.expand_dims(jet_idx_capped, axis=-1), updates=py) @@ -597,17 +532,26 @@ def jet_reco(px, py, jet_idx, max_jets): @tf.function def batched_jet_reco(px, py, jet_idx, max_jets): - tf.debugging.assert_shapes([ - (px, ('B', 'N')), - (py, ('B', 'N')), - (jet_idx, ('B', 'N')), - ]) + tf.debugging.assert_shapes( + [ + (px, ("B", "N")), + (py, ("B", "N")), + (jet_idx, ("B", "N")), + ] + ) return tf.map_fn( - lambda a: jet_reco(a[0], a[1], a[2], max_jets), (px, py, jet_idx), - fn_output_signature=tf.TensorSpec([max_jets, ], dtype=tf.float32) + lambda a: jet_reco(a[0], a[1], a[2], max_jets), + (px, py, jet_idx), + fn_output_signature=tf.TensorSpec( + [ + max_jets, + ], + dtype=tf.float32, + ), ) + @tf.function def gen_jet_loss(y_true, y_pred): y = {} @@ -618,11 +562,11 @@ def gen_jet_loss(y_true, y_pred): max_jets = 201 jet_idx = tf.cast(y["true"][..., 5], dtype=tf.int32) for typ in ["true", "pred"]: - px = y[typ][..., 0]*y[typ][..., 4] - py = y[typ][..., 0]*y[typ][..., 3] + px = y[typ][..., 0] * y[typ][..., 4] + py = y[typ][..., 0] * y[typ][..., 3] jet_pt[typ] = batched_jet_reco(px, py, jet_idx, max_jets) - mse = tf.math.sqrt(tf.reduce_mean((jet_pt['true']-jet_pt['pred'])**2, axis=[-1,-2])) + mse = tf.math.sqrt(tf.reduce_mean((jet_pt["true"] - jet_pt["pred"]) ** 2, axis=[-1, -2])) return mse diff --git a/parameters/cms-gen.yaml b/parameters/cms-gen.yaml index 46c61994b..ea66f094f 100644 --- a/parameters/cms-gen.yaml +++ b/parameters/cms-gen.yaml @@ -141,10 +141,6 @@ parameters: regression_use_classification: yes dropout: 0.0 - pt_skip_gate: no - eta_skip_gate: yes - phi_skip_gate: yes - id_dim_decrease: yes charge_dim_decrease: yes pt_dim_decrease: yes @@ -221,7 +217,7 @@ raytune: n_random_steps: 10 train_test_datasets: - physical: + physical: batch_per_gpu: 5 datasets: - cms_pf_ttbar @@ -232,7 +228,7 @@ train_test_datasets: validation_datasets: - cms_pf_ttbar -datasets: +datasets: cms_pf_ttbar: version: 1.4.0 data_dir: diff --git a/parameters/cms.yaml b/parameters/cms.yaml index ff5ef7cac..a343a367c 100644 --- a/parameters/cms.yaml +++ b/parameters/cms.yaml @@ -124,10 +124,6 @@ parameters: regression_use_classification: yes dropout: 0.016312 # Set to 0 in future training - pt_skip_gate: no - eta_skip_gate: yes - phi_skip_gate: yes - id_dim_decrease: yes charge_dim_decrease: yes pt_dim_decrease: yes @@ -205,7 +201,7 @@ raytune: n_random_steps: 10 train_test_datasets: - physical: + physical: batch_per_gpu: 32 datasets: - cms_pf_ttbar @@ -216,7 +212,7 @@ train_test_datasets: validation_datasets: - cms_pf_ttbar -datasets: +datasets: cms_pf_ttbar: version: 1.4.0 data_dir: diff --git a/parameters/delphes.yaml b/parameters/delphes.yaml index 54effde2b..a15b5ab2f 100644 --- a/parameters/delphes.yaml +++ b/parameters/delphes.yaml @@ -7,7 +7,7 @@ dataset: num_output_features: 7 #(none=0, track=1, cluster=2) num_input_classes: 3 - #(none=0, charged hadron=1, neutral hadron=2, photon=3, electron=4, muon=5) + #(none=0, charged hadron=1, neutral hadron=2, photon=3, electron=4, muon=5) num_output_classes: 6 num_momentum_outputs: 5 padded_num_elem_size: 6400 @@ -126,10 +126,6 @@ parameters: regression_use_classification: yes dropout: 0.0 - pt_skip_gate: yes - eta_skip_gate: yes - phi_skip_gate: yes - id_dim_decrease: yes charge_dim_decrease: yes pt_dim_decrease: yes @@ -160,11 +156,6 @@ timing: num_ev: 100 num_iter: 3 -exponentialdecay: - decay_steps: 10000 - decay_rate: 0.99 - staircase: yes - callbacks: checkpoint: monitor: "val_loss" @@ -210,7 +201,7 @@ raytune: n_random_steps: 10 train_test_datasets: - delphes: + delphes: batch_per_gpu: 5 datasets: - delphes_pf diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..8e1cc106e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +awkward +boost_histogram +click +fastjet +keras +keras-tuner +matplotlib +mplhep +networkx +nevergrad +notebook +onnxruntime +pandas +papermill +pre-commit +pyarrow +ray[default]==1.6.0 +ray[tune]==1.6.0 +scikit-optimize +scipy +seaborn +setGPU +sklearn +tensorflow==2.9 +tensorflow-addons +tensorflow-datasets +tensorflow-estimator +tensorflow-probability +tensorflow-text +tf-models-official +tf2onnx +tqdm +uproot +vector +zenodo_get diff --git a/scripts/local_test_cms_pipeline.sh b/scripts/local_test_cms_pipeline.sh index 722c7f469..02e9e9c0e 100755 --- a/scripts/local_test_cms_pipeline.sh +++ b/scripts/local_test_cms_pipeline.sh @@ -33,7 +33,7 @@ python mlpf/pipeline.py train -c parameters/cms-gen.yaml --nepochs 1 --customize ls ./experiments/cms*/weights/ #Generate the pred.npz file of predictions -python mlpf/pipeline.py evaluate --customize pipeline_test -t ./experiments/cms* -w ./experiments/cms*/weights/weights-01-*.hdf5 +python mlpf/pipeline.py evaluate --customize pipeline_test --nevents 10 -t ./experiments/cms* -w ./experiments/cms*/weights/weights-01-*.hdf5 #Evaluate the notebook papermill --inject-output-path --log-output -p path ./experiments/cms*/evaluation/epoch_1/cms_pf_ttbar/ notebooks/cms-mlpf.ipynb ./out.ipynb diff --git a/scripts/local_test_delphes_pipeline.sh b/scripts/local_test_delphes_pipeline.sh index 132facf3d..e95b78c02 100755 --- a/scripts/local_test_delphes_pipeline.sh +++ b/scripts/local_test_delphes_pipeline.sh @@ -19,4 +19,4 @@ python mlpf/pipeline.py train -c parameters/delphes.yaml --nepochs 1 --ntrain 5 ls ./experiments/delphes_*/weights/ #Generate the pred.npz file of predictions -python mlpf/pipeline.py evaluate -t ./experiments/delphes_* +python mlpf/pipeline.py evaluate --nevents 10 -t ./experiments/delphes_*