Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add low-pT electrons to MINIAOD, update ID, improve end user experience #31220

Merged
merged 10 commits into from
Dec 2, 2020
1 change: 1 addition & 0 deletions CommonTools/MVAUtils/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
<use name="CommonTools/Utils"/>
<use name="CondFormats/EgammaObjects"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/Utilities"/>
Expand Down
4 changes: 4 additions & 0 deletions CommonTools/MVAUtils/bin/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<use name="CommonTools/MVAUtils"/>

<bin name="convertXMLToGBRForestROOT" file="convertXMLToGBRForestROOT.cc">
</bin>
34 changes: 34 additions & 0 deletions CommonTools/MVAUtils/bin/convertXMLToGBRForestROOT.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "CommonTools/MVAUtils/interface/GBRForestTools.h"

#include "TFile.h"

#include <filesystem>
#include <iostream>

int main(int argc, char **argv) {
if (argc != 3) {
std::cout << "Please pass a (gzipped) BDT weight file and a name for the output ROOT file." << std::endl;
return 1;
}

char *inputFileName = argv[1];
char *outputFileName = argv[2];

if (!std::filesystem::exists(inputFileName)) {
std::cout << "Input file " << inputFileName << " does not exists." << std::endl;
return 1;
}

if (std::filesystem::exists(outputFileName)) {
std::cout << "Output file " << outputFileName << " already exists." << std::endl;
return 1;
}

auto gbrForest = createGBRForest(inputFileName);
std::cout << "Read GBRForest " << inputFileName << " successfully." << std::endl;

TFile{outputFileName, "RECREATE"}.WriteObject(gbrForest.get(), "gbrForest");
std::cout << "GBRForest written to " << outputFileName << " successfully." << std::endl;

return 0;
}
13 changes: 13 additions & 0 deletions CommonTools/MVAUtils/src/GBRForestTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
#include "FWCore/ParameterSet/interface/FileInPath.h"
#include "FWCore/Utilities/interface/Exception.h"

#include "TFile.h"

#include <cstdio>
#include <cstdlib>
#include <RVersion.h>
#include <cmath>
#include <tinyxml2.h>
#include <filesystem>

namespace {

Expand Down Expand Up @@ -117,6 +120,16 @@ namespace {
}

std::unique_ptr<GBRForest> init(const std::string& weightsFileFullPath, std::vector<std::string>& varNames) {
//
// Load weights file, for ROOT file
//
if (reco::details::hasEnding(weightsFileFullPath, ".root")) {
TFile gbrForestFile(weightsFileFullPath.c_str());
std::unique_ptr<GBRForest> up(gbrForestFile.Get<GBRForest>("gbrForest"));
gbrForestFile.Close("nodelete");
return up;
}

//
// Load weights file, for gzipped or raw xml file
//
Expand Down
2 changes: 2 additions & 0 deletions DataFormats/PatCandidates/src/classes_def_objects.xml
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,8 @@
<class name="std::vector<edm::Ptr<pat::PackedCandidate> >" />
<class name="edm::Wrapper<edm::Ptr<pat::PackedCandidate> >" />
<class name="edm::Wrapper<std::vector<edm::Ptr<pat::PackedCandidate> > >" />
<class name="edm::ValueMap<edm::Ptr<pat::PackedCandidate> >" />
<class name="edm::Wrapper<edm::ValueMap<edm::Ptr<pat::PackedCandidate> > >" />

<class name="std::vector<edm::Ptr<pat::PackedGenParticle> >" />
<class name="edm::Wrapper<edm::Ptr<pat::PackedGenParticle> >" />
Expand Down
1 change: 1 addition & 0 deletions PhysicsTools/PatAlgos/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<use name="HLTrigger/HLTcore"/>
<use name="DataFormats/PatCandidates"/>
<use name="DataFormats/BTauReco"/>
<use name="DataFormats/Common"/>
<use name="DataFormats/JetReco"/>
<use name="DataFormats/MuonReco"/>
<use name="DataFormats/TrackReco"/>
Expand Down
104 changes: 66 additions & 38 deletions PhysicsTools/PatAlgos/plugins/LowPtGSFToPackedCandidateLinker.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include <string>

#include "DataFormats/Candidate/interface/Candidate.h"
#include "DataFormats/Common/interface/RefToPtr.h"
#include "DataFormats/ParticleFlowCandidate/interface/PFCandidate.h"
#include "DataFormats/ParticleFlowCandidate/interface/PFCandidateFwd.h"
#include "DataFormats/GsfTrackReco/interface/GsfTrack.h"
#include "DataFormats/GsfTrackReco/interface/GsfTrackFwd.h"
#include "DataFormats/PatCandidates/interface/Electron.h"
#include "DataFormats/PatCandidates/interface/PackedCandidate.h"
#include "DataFormats/Common/interface/Association.h"
#include "FWCore/Framework/interface/global/EDProducer.h"
Expand All @@ -19,6 +20,9 @@
#include "DataFormats/EgammaReco/interface/ElectronSeedFwd.h"
#include "FWCore/Framework/interface/MakerMacros.h"

typedef edm::Ptr<pat::PackedCandidate> PackedCandidatePtr;
typedef std::vector<PackedCandidatePtr> PackedCandidatePtrCollection;

class LowPtGSFToPackedCandidateLinker : public edm::global::EDProducer<> {
public:
explicit LowPtGSFToPackedCandidateLinker(const edm::ParameterSet&);
Expand All @@ -36,6 +40,7 @@ class LowPtGSFToPackedCandidateLinker : public edm::global::EDProducer<> {
const edm::EDGetTokenT<edm::Association<pat::PackedCandidateCollection> > lost2trk_;
const edm::EDGetTokenT<edm::Association<reco::TrackCollection> > gsf2trk_;
const edm::EDGetTokenT<std::vector<reco::GsfTrack> > gsftracks_;
const edm::EDGetTokenT<std::vector<pat::Electron> > electrons_;
};

LowPtGSFToPackedCandidateLinker::LowPtGSFToPackedCandidateLinker(const edm::ParameterSet& iConfig)
Expand All @@ -48,44 +53,34 @@ LowPtGSFToPackedCandidateLinker::LowPtGSFToPackedCandidateLinker(const edm::Para
lost2trk_{consumes<edm::Association<pat::PackedCandidateCollection> >(
iConfig.getParameter<edm::InputTag>("lostTracks"))},
gsf2trk_{consumes<edm::Association<reco::TrackCollection> >(iConfig.getParameter<edm::InputTag>("gsfToTrack"))},
gsftracks_{consumes<std::vector<reco::GsfTrack> >(iConfig.getParameter<edm::InputTag>("gsfTracks"))} {
produces<edm::Association<pat::PackedCandidateCollection> >("packedCandidates");
produces<edm::Association<pat::PackedCandidateCollection> >("lostTracks");
gsftracks_{consumes<std::vector<reco::GsfTrack> >(iConfig.getParameter<edm::InputTag>("gsfTracks"))},
electrons_{consumes<std::vector<pat::Electron> >(iConfig.getParameter<edm::InputTag>("electrons"))} {
produces<edm::Association<pat::PackedCandidateCollection> >("gsf2packed");
produces<edm::Association<pat::PackedCandidateCollection> >("gsf2lost");
produces<edm::ValueMap<PackedCandidatePtr> >("ele2packed");
produces<edm::ValueMap<PackedCandidatePtr> >("ele2lost");
}

LowPtGSFToPackedCandidateLinker::~LowPtGSFToPackedCandidateLinker() {}

void LowPtGSFToPackedCandidateLinker::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const {
edm::Handle<reco::PFCandidateCollection> pfcands;
iEvent.getByToken(pfcands_, pfcands);

edm::Handle<pat::PackedCandidateCollection> packed;
iEvent.getByToken(packed_, packed);

edm::Handle<pat::PackedCandidateCollection> lost_tracks;
iEvent.getByToken(lost_tracks_, lost_tracks);

edm::Handle<edm::Association<pat::PackedCandidateCollection> > pf2packed;
iEvent.getByToken(pf2packed_, pf2packed);

edm::Handle<edm::Association<pat::PackedCandidateCollection> > lost2trk_assoc;
iEvent.getByToken(lost2trk_, lost2trk_assoc);

edm::Handle<std::vector<reco::GsfTrack> > gsftracks;
iEvent.getByToken(gsftracks_, gsftracks);

edm::Handle<reco::TrackCollection> tracks;
iEvent.getByToken(tracks_, tracks);

edm::Handle<edm::Association<reco::TrackCollection> > gsf2trk;
iEvent.getByToken(gsf2trk_, gsf2trk);
auto pfcands = iEvent.getHandle(pfcands_);
auto packed = iEvent.getHandle(packed_);
auto lost_tracks = iEvent.getHandle(lost_tracks_);
auto pf2packed = iEvent.getHandle(pf2packed_);
auto lost2trk_assoc = iEvent.getHandle(lost2trk_);
auto gsftracks = iEvent.getHandle(gsftracks_);
auto tracks = iEvent.getHandle(tracks_);
auto gsf2trk = iEvent.getHandle(gsf2trk_);
auto electrons = iEvent.getHandle(electrons_);

// collection sizes, for reference
const size_t npf = pfcands->size();
const size_t npacked = packed->size();
const size_t nlost = lost_tracks->size();
const size_t ntracks = tracks->size();
const size_t ngsf = gsftracks->size();
const size_t nele = electrons->size();

//store index mapping in vectors for easy and fast access
std::vector<size_t> trk2packed(ntracks, npacked);
Expand All @@ -94,6 +89,8 @@ void LowPtGSFToPackedCandidateLinker::produce(edm::StreamID, edm::Event& iEvent,
//store auxiliary mappings for association
std::vector<int> gsf2pack(ngsf, -1);
std::vector<int> gsf2lost(ngsf, -1);
PackedCandidatePtrCollection ele2packedptr(nele, PackedCandidatePtr(packed, -1));
PackedCandidatePtrCollection ele2lostptr(nele, PackedCandidatePtr(lost_tracks, -1));

//electrons will never store their track (they store the Gsf track)
//map PackedPF <--> Track
Expand All @@ -103,8 +100,7 @@ void LowPtGSFToPackedCandidateLinker::produce(edm::StreamID, edm::Event& iEvent,
auto packed_ref = (*pf2packed)[pf_ref];
if (cand.charge() && packed_ref.isNonnull() && cand.trackRef().isNonnull() && cand.trackRef().id() == tracks.id()) {
size_t trkid = cand.trackRef().index();
size_t packid = packed_ref.index();
trk2packed[trkid] = packid;
trk2packed[trkid] = packed_ref.index();
}
}

Expand All @@ -113,22 +109,20 @@ void LowPtGSFToPackedCandidateLinker::produce(edm::StreamID, edm::Event& iEvent,
reco::TrackRef key(tracks, itrk);
pat::PackedCandidateRef lostTrack = (*lost2trk_assoc)[key];
if (lostTrack.isNonnull()) {
size_t ilost = lostTrack.index(); //assumes that LostTracks are all made from the same track collection
trk2lost[itrk] = ilost;
trk2lost[itrk] = lostTrack.index(); // assumes that LostTracks are all made from the same track collection
}
}

//map Track --> GSF and fill GSF --> PackedCandidates and GSF --> Lost associations
for (unsigned int igsf = 0; igsf < ngsf; ++igsf) {
reco::GsfTrackRef gref(gsftracks, igsf);
reco::TrackRef trk = (*gsf2trk)[gref];
if (trk.id() != tracks.id()) {
reco::GsfTrackRef gsf_ref(gsftracks, igsf);
reco::TrackRef trk_ref = (*gsf2trk)[gsf_ref];
if (trk_ref.id() != tracks.id()) {
throw cms::Exception(
"WrongCollection",
"The reco::Track collection used to match against the GSF Tracks was not used to produce such tracks");
}
size_t trkid = trk.index();

size_t trkid = trk_ref.index();
if (trk2packed[trkid] != npacked) {
gsf2pack[igsf] = trk2packed[trkid];
}
Expand All @@ -137,18 +131,51 @@ void LowPtGSFToPackedCandidateLinker::produce(edm::StreamID, edm::Event& iEvent,
}
}

//map Electron-->pat::PFCandidatePtr via Electron-->GsfTrack-->Track and Track-->pat::PFCandidatePtr
for (unsigned int iele = 0; iele < nele; ++iele) {
auto const& ele = (*electrons)[iele];
reco::GsfTrackRef gsf_ref = ele.core()->gsfTrack();
reco::TrackRef trk_ref = (*gsf2trk)[gsf_ref];
if (trk_ref.id() != tracks.id()) {
throw cms::Exception(
"WrongCollection",
"The reco::Track collection used to match against the GSF Tracks was not used to produce such tracks");
}
size_t trkid = trk_ref.index();
auto packedIdx = trk2packed[trkid];
if (packedIdx != npacked) {
ele2packedptr[iele] = PackedCandidatePtr(packed, packedIdx);
}
auto lostIdx = trk2lost[trkid];
if (lostIdx != nlost) {
ele2lostptr[iele] = PackedCandidatePtr(lost_tracks, lostIdx);
}
}

// create output collections from the mappings
auto assoc_gsf2pack = std::make_unique<edm::Association<pat::PackedCandidateCollection> >(packed);
edm::Association<pat::PackedCandidateCollection>::Filler gsf2pack_filler(*assoc_gsf2pack);
gsf2pack_filler.insert(gsftracks, gsf2pack.begin(), gsf2pack.end());
gsf2pack_filler.fill();
iEvent.put(std::move(assoc_gsf2pack), "packedCandidates");
iEvent.put(std::move(assoc_gsf2pack), "gsf2packed");

auto assoc_gsf2lost = std::make_unique<edm::Association<pat::PackedCandidateCollection> >(lost_tracks);
edm::Association<pat::PackedCandidateCollection>::Filler gsf2lost_filler(*assoc_gsf2lost);
gsf2lost_filler.insert(gsftracks, gsf2lost.begin(), gsf2lost.end());
gsf2lost_filler.fill();
iEvent.put(std::move(assoc_gsf2lost), "lostTracks");
iEvent.put(std::move(assoc_gsf2lost), "gsf2lost");

auto map_ele2packedptr = std::make_unique<edm::ValueMap<PackedCandidatePtr> >();
edm::ValueMap<PackedCandidatePtr>::Filler ele2packedptr_filler(*map_ele2packedptr);
ele2packedptr_filler.insert(electrons, ele2packedptr.begin(), ele2packedptr.end());
ele2packedptr_filler.fill();
iEvent.put(std::move(map_ele2packedptr), "ele2packed");

auto map_ele2lostptr = std::make_unique<edm::ValueMap<PackedCandidatePtr> >();
edm::ValueMap<PackedCandidatePtr>::Filler ele2lostptr_filler(*map_ele2lostptr);
ele2lostptr_filler.insert(electrons, ele2lostptr.begin(), ele2lostptr.end());
ele2lostptr_filler.fill();
iEvent.put(std::move(map_ele2lostptr), "ele2lost");
}

void LowPtGSFToPackedCandidateLinker::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
Expand All @@ -159,6 +186,7 @@ void LowPtGSFToPackedCandidateLinker::fillDescriptions(edm::ConfigurationDescrip
desc.add<edm::InputTag>("tracks", edm::InputTag("generalTracks"));
desc.add<edm::InputTag>("gsfToTrack", edm::InputTag("lowPtGsfToTrackLinks"));
desc.add<edm::InputTag>("gsfTracks", edm::InputTag("lowPtGsfEleGsfTracks"));
desc.add<edm::InputTag>("electrons", edm::InputTag("selectedPatLowPtElectrons"));
descriptions.add("lowPtGsfLinksDefault", desc);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,49 @@
)

patLowPtElectrons = patElectrons.clone(
# input collections

# Input collection
electronSource = sourceElectrons,
genParticleMatch = cms.InputTag("lowPtElectronMatch"),
# overrides
addElectronID = cms.bool(False),
addGenMatch = cms.bool(True),
addMVAVariables = cms.bool(False),
addPFClusterIso = cms.bool(False),
electronIDSources = cms.PSet(),
computeMiniIso = cms.bool(False),
isoDeposits = cms.PSet(),
isolationValues = cms.PSet(),
isolationValuesNoPFId = cms.PSet(),
miniIsoParamsB = cms.vdouble(),
miniIsoParamsE = cms.vdouble(),
usePfCandidateMultiMap = cms.bool(False),
# embedding
embedBasicClusters = cms.bool(False),
embedGenMatch = cms.bool(False),
embedGsfElectronCore = cms.bool(False),
embedGsfTrack = cms.bool(False),
embedHighLevelSelection = cms.bool(False),
embedPFCandidate = cms.bool(False),
embedPflowBasicClusters = cms.bool(False),
embedPflowPreshowerClusters = cms.bool(False),
embedPflowSuperCluster = cms.bool(False),
embedPreshowerClusters = cms.bool(False),
embedRecHits = cms.bool(False),
embedSeedCluster = cms.bool(False),
embedSuperCluster = cms.bool(False),
embedTrack = cms.bool(True),
)

# MC matching
genParticleMatch = "lowPtElectronMatch",

# Electron ID
addElectronID = True,
electronIDSources = dict(
unbiased = cms.InputTag("rekeyLowPtGsfElectronSeedValueMaps:unbiased"),
ptbiased = cms.InputTag("rekeyLowPtGsfElectronSeedValueMaps:ptbiased"),
ID = cms.InputTag("lowPtGsfElectronID"),
),

# Embedding of RECO/AOD items

# Embedding of RECO/AOD items
embedTrack = True,
embedGsfElectronCore = True,
embedGsfTrack = True,
embedSuperCluster = True,
embedSeedCluster = True,
embedBasicClusters = True,
embedPreshowerClusters = False,
embedRecHits = False,
embedPflowSuperCluster = False,
embedPflowBasicClusters = False,
embedPflowPreshowerClusters = False,
embedPFCandidate = False,

# Miscellaneous flags
addMVAVariables = False,
embedHighLevelSelection = False,
isoDeposits = cms.PSet(),
isolationValues = cms.PSet(),
isolationValuesNoPFId = cms.PSet(),
Comment on lines +49 to +50
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these do not exist in the origin, is there really need to add empty sets?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are set in the code, here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the point is to keep some consistency at the python layer.
So, if this is a clone from patElectrons, keep things defined consistently.
I'd still prefer to only add/modify elements that are different from patElectrons, but if more things need to be added for cosmetic reasons, please add them in both files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I do edmConfigDump, I see this for my patLowPtElectron:

    isoDeposits = cms.PSet(
        pfChargedAll = cms.InputTag("elPFIsoDepositChargedAllPAT"),
        pfChargedHadrons = cms.InputTag("elPFIsoDepositChargedPAT"),
        pfNeutralHadrons = cms.InputTag("elPFIsoDepositNeutralPAT"),
        pfPUChargedHadrons = cms.InputTag("elPFIsoDepositPUPAT"),
        pfPhotons = cms.InputTag("elPFIsoDepositGammaPAT")
    ),
    isolationValues = cms.PSet(
        pfChargedAll = cms.InputTag("elPFIsoValueChargedAll04PFIdPAT"),
        pfChargedHadrons = cms.InputTag("elPFIsoValueCharged04PFIdPAT"),
        pfNeutralHadrons = cms.InputTag("elPFIsoValueNeutral04PFIdPAT"),
        pfPUChargedHadrons = cms.InputTag("elPFIsoValuePU04PFIdPAT"),
        pfPhotons = cms.InputTag("elPFIsoValueGamma04PFIdPAT")
    ),
    isolationValuesNoPFId = cms.PSet(
        pfChargedAll = cms.InputTag("elPFIsoValueChargedAll04NoPFIdPAT"),
        pfChargedHadrons = cms.InputTag("elPFIsoValueCharged04NoPFIdPAT"),
        pfNeutralHadrons = cms.InputTag("elPFIsoValueNeutral04NoPFIdPAT"),
        pfPUChargedHadrons = cms.InputTag("elPFIsoValuePU04NoPFIdPAT"),
        pfPhotons = cms.InputTag("elPFIsoValueGamma04NoPFIdPAT")
    ),

which means these PSets are being set outside of the cfi file (either in c++ or in other python files).

This is not my desired configuration, hence my explicit override (in python).
Acceptable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, it is clear this way.
Thanks.


)

makePatLowPtElectronsTask = cms.Task(
lowPtElectronMatch,
patLowPtElectrons
patLowPtElectrons,
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from Configuration.ProcessModifiers.run2_miniAOD_UL_cff import run2_miniAOD_UL
run2_miniAOD_UL.toReplaceWith(makePatLowPtElectronsTask, cms.Task(makePatLowPtElectronsTask.copy()+lowPtGsfElectronID))

this would enable re-running of lowPtGsfElectronID only for specific inputs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what you mean by this comment.

I want to rerun both the ID and the rekey modules for run2_miniAOD_UL.
Is this not correct?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the new version seems OK, the comment was made before rekeying was in the config

makePatLowPtElectrons = cms.Sequence(makePatLowPtElectronsTask)
Expand All @@ -61,3 +68,12 @@
electronSource = "gedGsfElectrons",
genParticleMatch = "electronMatch"
)

# Schedule rekeying of seed BDT ValueMaps by reco::GsfElectron for run2_miniAOD_UL
from Configuration.ProcessModifiers.run2_miniAOD_UL_cff import run2_miniAOD_UL
from RecoEgamma.EgammaElectronProducers.lowPtGsfElectronSeedValueMaps_cff import rekeyLowPtGsfElectronSeedValueMaps
from RecoEgamma.EgammaElectronProducers.lowPtGsfElectronID_cfi import lowPtGsfElectronID
_makePatLowPtElectronsTask = makePatLowPtElectronsTask.copy()
_makePatLowPtElectronsTask.add(rekeyLowPtGsfElectronSeedValueMaps)
_makePatLowPtElectronsTask.add(lowPtGsfElectronID)
run2_miniAOD_UL.toReplaceWith(makePatLowPtElectronsTask,_makePatLowPtElectronsTask)
Loading