From ce69cf522976e7a940cae8ca3d414d7f1e44e498 Mon Sep 17 00:00:00 2001 From: Andrew Gilbert Date: Sun, 4 Oct 2015 09:31:09 +0200 Subject: [PATCH] added version of MorphingMSSM that doesn't build the mA-tanb model and instead just leaves the h, H and A masses floating. To be used with the first version of a PhysicsModel that does the model building instead. --- CombinePdfs/bin/BuildFile.xml | 1 + CombinePdfs/bin/MorphingMSSM-NoModel.cpp | 145 +++++++++++++++++++++++ CombinePdfs/bin/MorphingMSSM.cpp | 2 +- CombinePdfs/python/MSSM.py | 131 ++++++++++++++++++++ docs/Main.md | 6 +- 5 files changed, 281 insertions(+), 4 deletions(-) create mode 100644 CombinePdfs/bin/MorphingMSSM-NoModel.cpp create mode 100644 CombinePdfs/python/MSSM.py diff --git a/CombinePdfs/bin/BuildFile.xml b/CombinePdfs/bin/BuildFile.xml index 489b06a9886..27911fc1161 100644 --- a/CombinePdfs/bin/BuildFile.xml +++ b/CombinePdfs/bin/BuildFile.xml @@ -1,6 +1,7 @@ + diff --git a/CombinePdfs/bin/MorphingMSSM-NoModel.cpp b/CombinePdfs/bin/MorphingMSSM-NoModel.cpp new file mode 100644 index 00000000000..c48a1ac9c58 --- /dev/null +++ b/CombinePdfs/bin/MorphingMSSM-NoModel.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include "CombineHarvester/CombineTools/interface/CombineHarvester.h" +#include "CombineHarvester/CombineTools/interface/Utilities.h" +#include "CombineHarvester/CombineTools/interface/TFileIO.h" +#include "CombineHarvester/CombineTools/interface/HttSystematics.h" +#include "CombineHarvester/CombinePdfs/interface/MorphFunctions.h" + +#include "RooWorkspace.h" +#include "RooRealVar.h" +#include "TH2.h" + +using namespace std; + +int main() { + typedef vector> Categories; + typedef vector VString; + + // We will need to source some inputs from the "auxiliaries" repo + string auxiliaries = string(getenv("CMSSW_BASE")) + "/src/auxiliaries/"; + string aux_shapes = auxiliaries +"shapes/"; + + // RooFit will be quite noisy if we don't set this + // RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING); + + RooRealVar mA("mA", "mA", 90., 1000.); + RooRealVar mH("mH", "mH", 90., 1000.); + RooRealVar mh("mh", "mh", 90., 1000.); + + + TH1::AddDirectory(false); + ch::CombineHarvester cb; + + Categories mt_cats = { + make_pair(8, "muTau_nobtag"), + make_pair(9, "muTau_btag")}; + + auto masses = ch::MassesFromRange( + "90,100,120-140:10,140-200:20,200-500:50,600-1000:100"); + + cout << "Adding observations..."; + cb.AddObservations({"*"}, {"htt"}, {"8TeV"}, {"mt"}, mt_cats); + cout << " done\n"; + + cout << "Adding background processes..."; + cb.AddProcesses({"*"}, {"htt"}, {"8TeV"}, {"mt"}, + {"ZTT", "W", "QCD", "ZL", "ZJ", "TT", "VV"}, mt_cats, false); + cout << " done\n"; + + cout << "Adding signal processes..."; + // Unlike in previous MSSM H->tautau analyses we will create a separate + // process for each Higgs in the datacards + map signal_types = { + {"ggH", {"ggh_htautau", "ggH_Htautau", "ggA_Atautau"}}, + {"bbH", {"bbh_htautau", "bbH_Htautau", "bbA_Atautau"}} + }; + cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, + signal_types["ggH"], {mt_cats[0]}, true); + cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, + signal_types["bbH"], {mt_cats[1]}, true); + cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, + signal_types["bbH"], {mt_cats[0]}, true); + cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, + signal_types["ggH"], {mt_cats[1]}, true); + cout << " done\n"; + + cout << "Adding systematic uncertainties..."; + ch::AddMSSMSystematics(cb); + cout << " done\n"; + + string file = aux_shapes + "Imperial/htt_mt.inputs-mssm-8TeV-0.root"; + + cout << "Extracting histograms from input root files..."; + cb.cp().era({"8TeV"}).backgrounds().ExtractShapes( + file, "$CHANNEL/$PROCESS", "$CHANNEL/$PROCESS_$SYSTEMATIC"); + + // We have to map each Higgs signal process to the same histogram, i.e: + // {ggh, ggH, ggA} --> ggH + // {bbh, bbH, bbA} --> bbH + cb.cp().era({"8TeV"}).process(signal_types["ggH"]).ExtractShapes( + file, "$CHANNEL/ggH$MASS", + "$CHANNEL/ggH$MASS_$SYSTEMATIC"); + cb.cp().era({"8TeV"}).process(signal_types["bbH"]).ExtractShapes( + file, "$CHANNEL/bbH$MASS", + "$CHANNEL/bbH$MASS_$SYSTEMATIC"); + cout << " done\n"; + + cout << "Scaling signal process rates for acceptance...\n"; + for (string e : {"8TeV"}) { + for (string p : {"ggH", "bbH"}) { + cout << "Scaling for process " << p << " and era " << e << "\n"; + auto gr = ch::TGraphFromTable( + "input/xsecs_brs/mssm_" + p + "_" + e + "_accept.txt", "mPhi", + "accept"); + cb.cp().process(signal_types[p]).era({e}).ForEachProc([&](ch::Process *proc) { + double m = boost::lexical_cast(proc->mass()); + proc->set_rate(proc->rate() * gr.Eval(m)); + }); + } + } + + cout << "Setting standardised bin names..."; + ch::SetStandardBinNames(cb); + cout << " done\n"; + + RooWorkspace ws("htt", "htt"); + + TFile demo("htt_mt_mssm_demo.root", "RECREATE"); + + bool do_morphing = true; + map mass_var = { + {"ggh_htautau", &mh}, {"ggH_Htautau", &mH}, {"ggA_Atautau", &mA}, + {"bbh_htautau", &mh}, {"bbH_Htautau", &mH}, {"bbA_Atautau", &mA} + }; + if (do_morphing) { + auto bins = cb.bin_set(); + for (auto b : bins) { + auto procs = cb.cp().bin({b}).signals().process_set(); + for (auto p : procs) { + ch::BuildRooMorphing(ws, cb, b, p, *(mass_var[p]), + "norm", true, true, &demo); + } + } + } + demo.Close(); + cb.AddWorkspace(ws); + cb.cp().signals().ExtractPdfs(cb, "htt", "$BIN_$PROCESS_morph"); + cb.PrintAll(); + + string folder = "output/mssm_nomodel"; + boost::filesystem::create_directories(folder); + + TFile output((folder + "/htt_mt.input.mssm.root").c_str(), "RECREATE"); + + cb.cp().mass({"*"}).WriteDatacard(folder + "/htt_mt_mssm.txt", output); + auto bins = cb.bin_set(); + for (auto b : bins) { + cb.cp().bin({b}).mass({"*"}).WriteDatacard( + folder + "/" + b + ".txt", output); + } + output.Close(); +} + + diff --git a/CombinePdfs/bin/MorphingMSSM.cpp b/CombinePdfs/bin/MorphingMSSM.cpp index 2824a1281dd..026b3c999ec 100644 --- a/CombinePdfs/bin/MorphingMSSM.cpp +++ b/CombinePdfs/bin/MorphingMSSM.cpp @@ -119,7 +119,7 @@ int main() { TH2F h_bbH4f_xsec_A = ch::OpenFromTFile(&inputs, "h_bbH4f_xsec_A"); TH2F h_bbH_xsec_h = SantanderMatching(h_bbH4f_xsec_h, h_bbH5f_xsec_h, &h_mh); TH2F h_bbH_xsec_H = SantanderMatching(h_bbH4f_xsec_H, h_bbH5f_xsec_H, &h_mH); - TH2F h_bbH_xsec_A = SantanderMatching(h_bbH4f_xsec_H, h_bbH5f_xsec_H, nullptr); + TH2F h_bbH_xsec_A = SantanderMatching(h_bbH4f_xsec_A, h_bbH5f_xsec_A, nullptr); RooDataHist dh_bbH_xsec_h("dh_bbH_xsec_h", "dh_bbH_xsec_h", RooArgList(mA, tanb), RooFit::Import(h_bbH_xsec_h)); RooDataHist dh_bbH_xsec_H("dh_bbH_xsec_H", "dh_bbH_xsec_H", diff --git a/CombinePdfs/python/MSSM.py b/CombinePdfs/python/MSSM.py new file mode 100644 index 00000000000..ff16d0d9e9b --- /dev/null +++ b/CombinePdfs/python/MSSM.py @@ -0,0 +1,131 @@ +from HiggsAnalysis.CombinedLimit.PhysicsModel import * +import os +import ROOT +import math +import itertools + +class MSSMHiggsModel(PhysicsModel): + def __init__(self): + PhysicsModel.__init__(self) + # Define the known production and decay processes + # These are strings we will look for in the process names to + # determine the correct normalisation scaling + self.ERAS = ['7TeV', '8TeV', '13TeV'] + # We can't add anything here that we're not actually going to have + # loaded from the model files. Should integrate this into the options + # somehow. + eras = ['8TeV'] + self.PROC_SETS = [] + self.PROC_SETS.append( + ([ 'ggh', 'bbh' ], [ 'htautau' ], eras) + ) + self.PROC_SETS.append( + ([ 'ggH', 'bbH' ], [ 'Htautau' ], eras) + ) + self.PROC_SETS.append( + ([ 'ggA', 'bbA' ], [ 'Atautau' ], eras) + ) + + def setModelBuilder(self, modelBuilder): + """We're not supposed to overload this method, but we have to because + this is our only chance to import things into the workspace while it + is completely empty. This is primary so we can define some of our MSSM + Higgs boson masses as functions instead of the free variables that would + be imported later as dependents of the normalisation terms.""" + # First call the parent class implementation + PhysicsModel.setModelBuilder(self, modelBuilder) + self.buildModel(os.environ['CMSSW_BASE']+'/src/auxiliaries/models/out.mhmax-mu+200-8TeV-tanbHigh-nnlo.root') + + def doHistFunc(self, name, hist, varlist, interpolate=1): + "method to conveniently create a RooHistFunc from a TH1/TH2 input" + dh = ROOT.RooDataHist('dh_%s'%name, 'dh_%s'%name, ROOT.RooArgList(*varlist), ROOT.RooFit.Import(hist)) + hfunc = ROOT.RooHistFunc(name, name, ROOT.RooArgSet(*varlist), dh) + hfunc.setInterpolationOrder(interpolate) + self.modelBuilder.out._import(hfunc, ROOT.RooFit.RecycleConflictNodes()) + return self.modelBuilder.out.function(name) + + def santanderMatching(self, h4f, h5f, mass = None): + res = h4f.Clone() + for x in xrange(1, h4f.GetNbinsX() + 1): + for y in xrange(1, h4f.GetNbinsY() +1): + mh = h4f.GetXaxis().GetBinCenter(x) if mass is None else mass.GetBinContent(x, y) + t = math.log(mh / 4.75) - 2. + fourflav = h4f.GetBinContent(x, y) + fiveflav = h5f.GetBinContent(x, y) + sigma = (1. / (1. + t)) * (fourflav + t * fiveflav) + res.SetBinContent(x, y, sigma) + return res + + + def buildModel(self, filename): + mA = ROOT.RooRealVar('mA', 'mA', 344., 90., 1000.) + tanb = ROOT.RooRealVar('tanb', 'tanb', 9., 1., 60.) + f = ROOT.TFile(filename) + mH = self.doHistFunc('mH', f.Get('h_mH'), [mA, tanb]) + mh = self.doHistFunc('mh', f.Get('h_mh'), [mA, tanb]) + ggF_xsec_h = self.doHistFunc('xsec_ggh_8TeV', f.Get('h_ggF_xsec_h'), [mA, tanb]) + ggF_xsec_H = self.doHistFunc('xsec_ggH_8TeV', f.Get('h_ggF_xsec_H'), [mA, tanb]) + ggF_xsec_A = self.doHistFunc('xsec_ggA_8TeV', f.Get('h_ggF_xsec_A'), [mA, tanb]) + bbH_xsec_h = self.doHistFunc('xsec_bbh_8TeV', self.santanderMatching(f.Get('h_bbH4f_xsec_h'), f.Get('h_bbH_xsec_h'), f.Get('h_mh')), [mA, tanb]) + bbH_xsec_H = self.doHistFunc('xsec_bbH_8TeV', self.santanderMatching(f.Get('h_bbH4f_xsec_H'), f.Get('h_bbH_xsec_H'), f.Get('h_mH')), [mA, tanb]) + bbH_xsec_A = self.doHistFunc('xsec_bbA_8TeV', self.santanderMatching(f.Get('h_bbH4f_xsec_A'), f.Get('h_bbH_xsec_A')), [mA, tanb]) + brtautau_h = self.doHistFunc('br_htautau', f.Get('h_brtautau_h'), [mA, tanb]) + brtautau_H = self.doHistFunc('br_Htautau', f.Get('h_brtautau_H'), [mA, tanb]) + ggF_xsec_A = self.doHistFunc('br_Atautau', f.Get('h_brtautau_A'), [mA, tanb]) + # Next step: creating theory uncertainties + # 1) for each syst source build kappaHi and kappaLo TH1s by doing a *careful* divide + # between nominal and shifted TH2s => "kappa_hi_xsec_ggh_8TeV_scale" + # 2) create nuisance parameter (QCD_scale_ggH?) and constraint. Like: + # def preProcessNuisances(self,nuisances): + # if self.add_bbH and not any(row for row in nuisances if row[0] == "QCDscale_bbH"): + # nuisances.append(("QCDscale_bbH",False, "param", [ "0", "1"], [] ) ) + # --> probably have to create the param ourself first, preProcessNuisances doesn't + # happen until later (after doParametersOfInterest) + # 3) create AsymPow and add to the norm product + + def doParametersOfInterest(self): + """Create POI and other parameters, and define the POI set.""" + # print '>> In doParametersOfInterest, workspace contents:' + self.modelBuilder.doVar("r[1,0,20]") + self.modelBuilder.out.var('mA').setConstant(True) + self.modelBuilder.out.var('tanb').setConstant(True) + for proc_set in self.PROC_SETS: + for (P, D, E) in itertools.product(*proc_set): + # print (P, D, E) + terms = ['xsec_%s_%s' % (P, E), 'br_%s'%D] + terms += ['r'] + self.modelBuilder.factory_('prod::scaling_%s_%s_%s(%s)' % (P,D,E,','.join(terms))) + self.modelBuilder.out.function('scaling_%s_%s_%s' % (P,D,E)).Print('') + + # self.modelBuilder.out.Print() + self.modelBuilder.doSet('POI', 'r') + + def getHiggsProdDecMode(self, bin, process): + """Return a triple of (production, decay, energy)""" + P = '' + D = '' + if "_" in process: + (P, D) = process.split("_") + else: + raise RuntimeError, 'Expected signal process %s to be of the form PROD_DECAY' % process + E = None + for era in self.ERAS: + if era in bin: + if E: raise RuntimeError, "Validation Error: bin string %s contains multiple known energies" % bin + E = era + if not E: + raise RuntimeError, 'Did not find a valid energy in bin string %s' % bin + return (P, D, E) + + def getYieldScale(self,bin,process): + if self.DC.isSignal[process]: + (P, D, E) = self.getHiggsProdDecMode(bin, process) + scaling = 'scaling_%s_%s_%s' % (P, D, E) + print 'Scaling %s/%s as %s' % (bin, process, scaling) + return scaling + else: + return 1 + + +MSSM = MSSMHiggsModel() + diff --git a/docs/Main.md b/docs/Main.md index 3414077987a..cf51ef59a22 100644 --- a/docs/Main.md +++ b/docs/Main.md @@ -3,7 +3,7 @@ Introduction {#mainpage} [TOC] -This page documents the CombineHarvester framework for the production and analysis of datacards for use with the CMS [combine](https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit) statistical analysis tool. The central part of this framework is the [CombineHarvester](\ref ch::CombineHarvester) class, which provides a representation of the text-format datacards and the associated shape input. +This page documents the CombineHarvester framework for the production and analysis of datacards for use with the CMS [combine](https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit) tool. The central part of this framework is the [CombineHarvester](\ref ch::CombineHarvester) class, which provides a representation of the text-format datacards and the associated shape input. The production of new datacards typically requires several steps, for example: @@ -22,11 +22,11 @@ Other functions include extracting information about the fit model: * Evaluating the expected rates of signal and background processes, both pre- and post-fit, and for the latter taking into account the nuisance parameter correlations * Similar evaluation of the background shapes, including the ability to scale, modify and re-bin the shapes in-place -As well as histogram-based templates, the production of datacards with arbitrary RooFit PDFs and datasets is also supported. Though not typically required by the \f$H\rightarrow\tau\tau\f$ group, such datacards are often used in other CMS Higgs groups, and may be useful in future \f$H\rightarrow\tau\tau\f$ analyses. +As well as histogram-based templates, the production of datacards with arbitrary RooFit PDFs and datasets is also supported. Getting started {#getting-started} ================================== -The CombineHarvester code is part of the official HiggsToTauTau CMSSW package for limit-setting tools: https://github.com/cms-analysis/HiggsAnalysis-HiggsToTauTau. All the framework code is located in the `CombineHarvester` directory, within which it is organised, via sub-directories, into packages. These are not seen or compiled by scram, but rather by a separate makefile-based build system. See further details on the page [Build System](\ref build). +The CombineHarvester code (https://github.com/cms-analysis/CombineHarvester) is contained within a "top-level" CMSSW package: `$CMSSW_BASE/src/CombineHarvester` with subpackages for the core framework (`CombineHarvester/CombineTools`) and custom PDF extensions (`CombineHarvester/CombinePdfs`). Further analysis-specific sub-packages will be added in future. You should checkout the HiggsAnalysis/HiggsToTauTau CMSSW package alongside the HiggsAnalysis/CombinedLimit package, using the release recommended by the combine developers [here](https://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideHiggsAnalysisCombinedLimit#SLC6_release). Note that the CombineHarvester framework is only compatible with the CMSSW 7_X_Y series releases. A new release area can be set up and compiled in the following steps: