Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add producers for tau ID and trigger weights. #4

Merged
merged 11 commits into from
Dec 23, 2022
1 change: 1 addition & 0 deletions hbt/calibration/tau.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

@calibrator(
uses={
# nano columns
"nTau", "Tau.pt", "Tau.eta", "Tau.phi", "Tau.mass", "Tau.charge", "Tau.genPartFlav",
"Tau.decayMode", "MET.pt", "MET.phi",
},
Expand Down
18 changes: 16 additions & 2 deletions hbt/config/analysis_hbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,22 @@
# 2017
from hbt.config.configs_run2ul import add_config as add_config_run2ul
import cmsdb.campaigns.run2_2017_nano_v9
campaign_run2_2017_nano_v9 = cmsdb.campaigns.run2_2017_nano_v9.campaign_run2_2017_nano_v9.copy()

campaign_run2_2017_nano_v9 = cmsdb.campaigns.run2_2017_nano_v9.campaign_run2_2017_nano_v9

# default config
add_config_run2ul(
analysis_hbt,
campaign_run2_2017_nano_v9.copy(),
config_name=campaign_run2_2017_nano_v9.name,
config_id=2,
)

# config with limited number of files for faster prototyping
add_config_run2ul(
analysis_hbt,
campaign_run2_2017_nano_v9,
campaign_run2_2017_nano_v9.copy(),
config_name=f"{campaign_run2_2017_nano_v9.name}_limited",
config_id=12,
limit_dataset_files=2,
)
138 changes: 57 additions & 81 deletions hbt/config/configs_run2ul.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Configuration of the HH → bb𝜏𝜏 analysis.
"""

from __future__ import annotations

import os
import re
import itertools
Expand All @@ -18,7 +20,13 @@
thisdir = os.path.dirname(os.path.abspath(__file__))


def add_config(analysis: od.Analysis, campaign: od.Campaign) -> od.Config:
def add_config(
analysis: od.Analysis,
campaign: od.Campaign,
config_name: str | None = None,
config_id: int | None = None,
pkausw marked this conversation as resolved.
Show resolved Hide resolved
limit_dataset_files: int | None = None,
) -> od.Config:
# some validations
assert campaign.x.year in [2016, 2017, 2018]
if campaign.x.year == 2016:
Expand All @@ -33,7 +41,7 @@ def add_config(analysis: od.Analysis, campaign: od.Campaign) -> od.Config:
procs = get_root_processes_from_campaign(campaign)

# create a config by passing the campaign, so id and name will be identical
cfg = analysis.add_config(campaign)
cfg = analysis.add_config(campaign, name=config_name, id=config_id)

# add processes we are interested in
cfg.add_process(procs.n.data)
Expand Down Expand Up @@ -126,6 +134,11 @@ def add_config(analysis: od.Analysis, campaign: od.Campaign) -> od.Config:
if dataset.name.startswith("tt"):
dataset.x.is_ttbar = True

# apply an optional limit on the number of files
if limit_dataset_files:
for info in dataset.info.values():
info.n_files = limit_dataset_files

# default objects, such as calibrator, selector, producer, ml model, inference model, etc
cfg.x.default_calibrator = "default"
cfg.x.default_selector = "default"
Expand Down Expand Up @@ -161,8 +174,9 @@ def add_config(analysis: od.Analysis, campaign: od.Campaign) -> od.Config:
"default": ["met_filter", "trigger_fired", "leptons", "jet", "bjet"],
}

# custom method for determining dataset lfns
cfg.x.determine_dataset_lfns = None
# custom method and sandbox for determining dataset lfns
cfg.x.get_dataset_lfns = None
cfg.x.get_dataset_lfns_sandbox = None

# lumi values in inverse pb
# https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun2?rev=2#Combination_and_correlations
Expand All @@ -189,66 +203,26 @@ def add_config(analysis: od.Analysis, campaign: od.Campaign) -> od.Config:
cfg.x.minbias_xs = Number(69.2, 0.046j)

# whether to validate the number of obtained LFNs in GetDatasetLFNs
cfg.x.validate_dataset_lfns = True
cfg.x.validate_dataset_lfns = limit_dataset_files is None

# b-tag working points
if year == 2016:
if campaign.x.vfp == "pre":
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16preVFP?rev=6
cfg.x.btag_working_points = DotDict.wrap({
"deepjet": {
"loose": 0.0508,
"medium": 0.2598,
"tight": 0.6502,
},
"deepcsv": {
"loose": 0.2027,
"medium": 0.6001,
"tight": 0.8819,
},
})
else: # post
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16postVFP?rev=8
cfg.x.btag_working_points = DotDict.wrap({
"deepjet": {
"loose": 0.0480,
"medium": 0.2489,
"tight": 0.6377,
},
"deepcsv": {
"loose": 0.1918,
"medium": 0.5847,
"tight": 0.8767,
},
})
elif year == 2017:
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=15
cfg.x.btag_working_points = DotDict.wrap({
"deepjet": {
"loose": 0.0532,
"medium": 0.3040,
"tight": 0.7476,
},
"deepcsv": {
"loose": 0.1355,
"medium": 0.4506,
"tight": 0.7738,
},
})
else: # 2018
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=17
cfg.x.btag_working_points = DotDict.wrap({
"deepjet": {
"loose": 0.0490,
"medium": 0.2783,
"tight": 0.7100,
},
"deepcsv": {
"loose": 0.1208,
"medium": 0.4168,
"tight": 0.7665,
},
})
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16preVFP?rev=6
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16postVFP?rev=8
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=15
# https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=17
btag_key = f"2016{campaign.x.vfp}" if year == 2016 else year
cfg.x.btag_working_points = DotDict.wrap({
"deepjet": {
"loose": {"2016pre": 0.0508, "2016post": 0.0480, 2017: 0.0532, 2018: 0.0490}[btag_key],
"medium": {"2016pre": 0.2598, "2016post": 0.2489, 2017: 0.3040, 2018: 0.2783}[btag_key],
"tight": {"2016pre": 0.6502, "2016post": 0.6377, 2017: 0.7476, 2018: 0.7100}[btag_key],
},
"deepcsv": {
"loose": {"2016pre": 0.2027, "2016post": 0.1918, 2017: 0.1355, 2018: 0.1208}[btag_key],
"medium": {"2016pre": 0.6001, "2016post": 0.5847, 2017: 0.4506, 2018: 0.4168}[btag_key],
"tight": {"2016pre": 0.8819, "2016post": 0.8767, 2017: 0.7738, 2018: 0.7665}[btag_key],
},
})

# name of the btag_sf correction set
cfg.x.btag_sf_correction_set = "deepJet_shape"
Expand Down Expand Up @@ -547,25 +521,25 @@ def add_aliases(
)

# external files
corrlib_base = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-849c6a6e"
json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-849c6a6e"
cfg.x.external_files = DotDict.wrap({
# jet energy correction
"jet_jerc": (f"{corrlib_base}/POG/JME/{year}{corr_postfix}_UL/jet_jerc.json.gz", "v1"),
"jet_jerc": (f"{json_mirror}/POG/JME/{year}{corr_postfix}_UL/jet_jerc.json.gz", "v1"),

# tau energy correction and scale factors
"tau_sf": (f"{corrlib_base}/POG/TAU/{year}{corr_postfix}_UL/tau.json.gz", "v1"),
"tau_sf": (f"{json_mirror}/POG/TAU/{year}{corr_postfix}_UL/tau.json.gz", "v1"),

# electron scale factors
"electron_sf": (f"{corrlib_base}/POG/EGM/{year}{corr_postfix}_UL/electron.json.gz", "v1"),
"electron_sf": (f"{json_mirror}/POG/EGM/{year}{corr_postfix}_UL/electron.json.gz", "v1"),

# muon scale factors
"muon_sf": (f"{corrlib_base}/POG/MUO/{year}{corr_postfix}_UL/muon_Z.json.gz", "v1"),
"muon_sf": (f"{json_mirror}/POG/MUO/{year}{corr_postfix}_UL/muon_Z.json.gz", "v1"),

# btag scale factor
"btag_sf_corr": (f"{corrlib_base}/POG/BTV/{year}{corr_postfix}_UL/btagging.json.gz", "v1"),
"btag_sf_corr": (f"{json_mirror}/POG/BTV/{year}{corr_postfix}_UL/btagging.json.gz", "v1"),

# met phi corrector
"met_phi_corr": (f"{corrlib_base}/POG/JME/{year}{corr_postfix}_UL/met.json.gz", "v1"),
"met_phi_corr": (f"{json_mirror}/POG/JME/{year}{corr_postfix}_UL/met.json.gz", "v1"),

# hh-btag repository (lightweight) with TF saved model directories
"hh_btag_repo": ("https://github.com/hh-italian-group/HHbtag/archive/1dc426053418e1cab2aec021802faf31ddf3c5cd.tar.gz", "v1"), # noqa
Expand Down Expand Up @@ -684,18 +658,20 @@ def add_aliases(

# versions per task family and optionally also dataset and shift
pkausw marked this conversation as resolved.
Show resolved Hide resolved
# None can be used as a key to define a default value
if cfg.name == "run2_2017_nano_v9":
cfg.x.versions = {
# "cf.CalibrateEvents": "dev1",
# "cf.MergeSelectionStats": "dev1",
# "cf.MergeSelectionMasks": "dev1",
# "cf.SelectEvents": "dev1",
# "cf.ReduceEvents": "dev1",
# "cf.MergeReductionStats": "dev1",
# "cf.MergeReducedEvents": "dev1",
}
else:
raise NotImplementedError(f"config versions not implemented for {cfg.name}")
# TODO: versioning is disabled for now and will be enabled once needed
cfg.x.versions = {}
# if cfg.name == "run2_2017_nano_v9":
# cfg.x.versions = {
# "cf.CalibrateEvents": "dev1",
# "cf.MergeSelectionStats": "dev1",
# "cf.MergeSelectionMasks": "dev1",
# "cf.SelectEvents": "dev1",
# "cf.ReduceEvents": "dev1",
# "cf.MergeReductionStats": "dev1",
# "cf.MergeReducedEvents": "dev1",
# }
# else:
# raise NotImplementedError(f"config versions not implemented for {cfg.name}")

# cannels
cfg.add_channel(name="mutau", id=1)
Expand Down
8 changes: 7 additions & 1 deletion hbt/production/btag.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@


@producer(
uses={btag_weights.PRODUCES, "process_id", "Jet.pt"},
uses={
btag_weights.PRODUCES,
# custom columns created upstream, probably by a producer
"process_id",
# nano columns
"Jet.pt",
},
# produced columns are defined in the init function below
)
def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
Expand Down
4 changes: 4 additions & 0 deletions hbt/production/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ def jet_energy_shifts_init(self: Producer) -> None:

@producer(
uses={
# nano columns
"Electron.pt", "Muon.pt", "Jet.pt", "BJet.pt",
},
produces={
# new columns
"ht", "n_jet", "n_hhbtag", "n_electron", "n_muon",
},
shifts={
Expand All @@ -65,10 +67,12 @@ def features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
@producer(
uses={
category_ids,
# nano columns
"Jet.pt", "Jet.eta", "Jet.phi",
},
produces={
category_ids,
# new columns
"cutflow.n_jet", "cutflow.ht", "cutflow.jet1_pt", "cutflow.jet1_eta", "cutflow.jet1_phi",
},
)
Expand Down
5 changes: 4 additions & 1 deletion hbt/production/hhbtag.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@

@producer(
uses={
"event", "channel_id",
# custom columns created upstream, probably by a selector
"channel_id",
# nano columns
"event",
"nJet", "Jet.pt", "Jet.eta", "Jet.phi", "Jet.mass", "Jet.jetId", "Jet.puId",
"Jet.btagDeepFlavB",
"MET.pt", "MET.phi",
Expand Down
Loading