Skip to content

Commit

Permalink
Added XGboost reader and TopMVA IDs
Browse files Browse the repository at this point in the history
  • Loading branch information
sgnoohc committed Oct 14, 2023
1 parent a8d83c4 commit 15ad668
Show file tree
Hide file tree
Showing 25 changed files with 378 additions and 2 deletions.
107 changes: 107 additions & 0 deletions NanoCORE/ElectronSelections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

using namespace tas;

namespace WWZ {
std::shared_ptr<XGBoostInterface> electron_mvareader_map;
}

bool SS::electronID(int idx, SS::IDLevel id_level, int year) {
// Common (across years and ID levels) checks
if (Electron_pt().at(idx) < 7.) { return false; }
Expand Down Expand Up @@ -349,6 +353,109 @@ bool ttH::isTriggerSafeNoIso(int idx) {
return true;
}

void WWZ::electronLoadMVA(int year, bool isAPV)
{

if (electron_mvareader_map)
{
std::cout << "WARNING: XGBoost already loaded, but is trying to load again!" << std::endl;
return;
}

std::string file_path = __FILE__;
std::string dir_path = file_path.substr(0, file_path.rfind("/"));
std::string fname = "el_TOP";
if (year == 2018)
fname += "UL18";
else if (year == 2017)
fname += "UL17";
else if (year == 2016)
{
fname += "UL16";
// Need to add option for APV as well....
if (isAPV)
{
fname += "APV";
}
}
fname += "_XGB.weights.bin";
fname = dir_path + "/data/TopLeptonMVA/" + fname;

std::cout << "electronLoadMVA(): Loading XGBoost binary file = " << fname << std::endl;

std::vector<std::string> varnames;
varnames = std::vector<std::string>{
"pt",
"eta",
"jetNDauCharged",
"miniPFRelIso_chg",
"miniPFRelIso_diff_all_chg", // = miniPFRelIso_all - miniPFRelIso_chg
"jetPtRelv2",
"jetPtRatio", // = 1/(jetRelIso+1)
"pfRelIso03_all",
"ak4jet:btagDeepFlavB", // B tagging discriminant score
"sip3d",
"log_abs_dxy",
"log_abs_dz",
};
varnames.push_back("mvaFall17V2noIso");
float missing_entry_val = std::numeric_limits<float>::quiet_NaN();
electron_mvareader_map = std::make_shared<XGBoostInterface>();
electron_mvareader_map->build(fname, varnames, missing_entry_val);
}

float WWZ::computeElectronTopMVAScore(unsigned int idx)
{

float res = -999;
std::unordered_map<std::string, float> input_vars;

auto const &vnames = electron_mvareader_map->getVariableNames();
for (auto const &vname : vnames)
{
if (vname == "pt")
input_vars[vname] = static_cast<float>(tas::Electron_pt().at(idx));
else if (vname == "eta")
input_vars[vname] = static_cast<float>(tas::Electron_eta().at(idx));
else if (vname == "miniPFRelIso_diff_all_chg")
input_vars[vname] = static_cast<float>(tas::Electron_miniPFRelIso_all().at(idx) - tas::Electron_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRatio")
input_vars[vname] = static_cast<float>(1. / (tas::Electron_jetRelIso().at(idx) + 1.));
else if (vname == "log_abs_dxy")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Electron_dxy().at(idx))));
else if (vname == "log_abs_dz")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Electron_dz().at(idx))));
else if (vname == "sip3d")
input_vars[vname] = static_cast<float>(tas::Electron_sip3d().at(idx));
else if (vname == "miniPFRelIso_chg")
input_vars[vname] = static_cast<float>(tas::Electron_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRelv2")
input_vars[vname] = static_cast<float>(tas::Electron_jetPtRelv2().at(idx));
else if (vname == "jetNDauCharged")
input_vars[vname] = static_cast<int>(tas::Electron_jetNDauCharged().at(idx));
else if (vname == "pfRelIso03_all")
input_vars[vname] = static_cast<float>(tas::Electron_pfRelIso03_all().at(idx));
else if (vname == "mvaFall17V2noIso")
input_vars[vname] = static_cast<float>(tas::Electron_mvaFall17V2noIso().at(idx));
else if (vname == "ak4jet:btagDeepFlavB")
{
input_vars[vname] = float(0);
if (tas::Electron_jetIdx().at(idx) == -1) input_vars[vname] = static_cast<float>(0.);
if (tas::Electron_jetIdx().at(idx) != -1) input_vars[vname] = static_cast<float>(tas::Jet_btagDeepFlavB().at(tas::Electron_jetIdx().at(idx)));
}
else
{
std::cerr << "WWZ::computeElectronTopMVAScore: Input variable name " << vname << " does not match to a corresponding variable" << endl;
std::cerr << "Have you loaded the XGBoost binary? i.e. did you call muonLoadMVA()?" << std::endl;
assert(0);
}
}

electron_mvareader_map->eval(input_vars, res);

return res;
}

bool WWZ::electronID(int idx, WWZ::IDLevel id_level, int year) {
// Year-specific checks
switch (year) {
Expand Down
4 changes: 4 additions & 0 deletions NanoCORE/ElectronSelections.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define ELECTRONSELECTIONS_H
#include "Base.h"
#include "Nano.h"
#include "Tools/xgboost/XGBoostInterface.h"

namespace SS {
enum ElectronMVAIDLevel {
Expand Down Expand Up @@ -31,6 +32,9 @@ namespace ttH {
}

namespace WWZ {
extern std::shared_ptr<XGBoostInterface> electron_mvareader_map;
void electronLoadMVA(int year, bool isAPV);
float computeElectronTopMVAScore(unsigned int idx);
bool electronID(int idx, WWZ::IDLevel id_level, int year);
bool electron2016ID(int idx, WWZ::IDLevel id_level);
bool electron2017ID(int idx, WWZ::IDLevel id_level);
Expand Down
11 changes: 9 additions & 2 deletions NanoCORE/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@ SOURCES=$(wildcard *.cc) $(wildcard Tools/*.cc) $(wildcard Tools/btagsf/*.cc) $(
OBJECTS=$(SOURCES:.cc=.o)
LIB=NANO_CORE.so

# XGBOOST essentials
XGBOOSTLIBDIR = ${XGBOOST_PATH}/lib/
XGBOOSTINCDIR = ${XGBOOSTLIBDIR}../include/
RABITINCDIR = ${XGBOOSTLIBDIR}../rabit/include/
XGBOOSTCXXFLAGS = -I$(XGBOOSTINCDIR) -I$(RABITINCDIR) -L$(XGBOOSTLIBDIR)
XGBOOSTLIBS = -lxgboost -L$(XGBOOSTLIBDIR)

$(LIB): $(OBJECTS)
$(LD) $(LDFLAGS) $(SOFLAGS) $(OBJECTS) $(ROOTLIBS) -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -o $@
$(LD) $(LDFLAGS) $(SOFLAGS) $(OBJECTS) $(XGBOOSTLIBS) $(ROOTLIBS) -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -o $@
ln -sf $(LIB) lib$(LIB)

%.o: %.cc
$(CXX) $(CXXFLAGS) -I${CMSSW_BASE}/../../../external/boost/1.67.0/include -I${CMSSW_BASE}/src -c $< -o $@ -fno-var-tracking
$(CXX) $(CXXFLAGS) $(XGBOOSTCXXFLAGS) -I${CMSSW_BASE}/../../../external/boost/1.67.0/include -I${CMSSW_BASE}/src -c $< -o $@ -fno-var-tracking

test: all
python Tools/unit_tests/tests.py
Expand Down
106 changes: 106 additions & 0 deletions NanoCORE/MuonSelections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

using namespace tas;

namespace WWZ {
std::shared_ptr<XGBoostInterface> muon_mvareader_map;
}

bool SS::muonID(unsigned int idx, SS::IDLevel id_level, int year) {
// Common (across years and ID levels) checks
if (Muon_pt().at(idx) < 5.) { return false; }
Expand Down Expand Up @@ -131,6 +135,108 @@ bool ttH::muonID(unsigned int idx, ttH::IDLevel id_level, int year) {
return true;
}

void WWZ::muonLoadMVA(int year, bool isAPV)
{

if (muon_mvareader_map)
{
std::cout << "WARNING: XGBoost already loaded, but is trying to load again!" << std::endl;
return;
}

std::string file_path = __FILE__;
std::string dir_path = file_path.substr(0, file_path.rfind("/"));
std::string fname = "mu_TOP";
if (year == 2018)
fname += "UL18";
else if (year == 2017)
fname += "UL17";
else if (year == 2016)
{
fname += "UL16";
// Need to add option for APV as well....
if (isAPV)
{
fname += "APV";
}
}
fname += "_XGB.weights.bin";
fname = dir_path + "/data/TopLeptonMVA/" + fname;

std::cout << "muonLoadMVA(): Loading XGBoost binary file = " << fname << std::endl;

std::vector<std::string> varnames;
varnames = std::vector<std::string>{
"pt",
"eta",
"jetNDauCharged",
"miniPFRelIso_chg",
"miniPFRelIso_diff_all_chg", // = miniPFRelIso_all - miniPFRelIso_chg
"jetPtRelv2",
"jetPtRatio", // = 1/(jetRelIso+1)
"pfRelIso03_all",
"ak4jet:btagDeepFlavB", // B tagging discriminant score
"sip3d",
"log_abs_dxy",
"log_abs_dz",
"segmentComp",
};
float missing_entry_val = std::numeric_limits<float>::quiet_NaN();
muon_mvareader_map = std::make_shared<XGBoostInterface>();
muon_mvareader_map->build(fname, varnames, missing_entry_val);
}

float WWZ::computeMuonTopMVAScore(unsigned int idx)
{

float res = -999;
std::unordered_map<std::string, float> input_vars;

auto const &vnames = muon_mvareader_map->getVariableNames();
for (auto const &vname : vnames)
{
if (vname == "pt")
input_vars[vname] = static_cast<float>(tas::Muon_pt().at(idx));
else if (vname == "eta")
input_vars[vname] = static_cast<float>(tas::Muon_eta().at(idx));
else if (vname == "miniPFRelIso_diff_all_chg")
input_vars[vname] = static_cast<float>(tas::Muon_miniPFRelIso_all().at(idx) - tas::Muon_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRatio")
input_vars[vname] = static_cast<float>(1. / (tas::Muon_jetRelIso().at(idx) + 1.));
else if (vname == "log_abs_dxy")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Muon_dxy().at(idx))));
else if (vname == "log_abs_dz")
input_vars[vname] = static_cast<float>(std::log(std::abs(tas::Muon_dz().at(idx))));
else if (vname == "sip3d")
input_vars[vname] = static_cast<float>(tas::Muon_sip3d().at(idx));
else if (vname == "segmentComp")
input_vars[vname] = static_cast<float>(tas::Muon_segmentComp().at(idx));
else if (vname == "miniPFRelIso_chg")
input_vars[vname] = static_cast<float>(tas::Muon_miniPFRelIso_chg().at(idx));
else if (vname == "jetPtRelv2")
input_vars[vname] = static_cast<float>(tas::Muon_jetPtRelv2().at(idx));
else if (vname == "jetNDauCharged")
input_vars[vname] = static_cast<int>(tas::Muon_jetNDauCharged().at(idx));
else if (vname == "pfRelIso03_all")
input_vars[vname] = static_cast<float>(tas::Muon_pfRelIso03_all().at(idx));
else if (vname == "ak4jet:btagDeepFlavB")
{
if (tas::Muon_jetIdx().at(idx) == -1) input_vars[vname] = static_cast<float>(0.);
if (tas::Muon_jetIdx().at(idx) != -1) input_vars[vname] = static_cast<float>(tas::Jet_btagDeepFlavB().at(tas::Muon_jetIdx().at(idx)));
}
else
{
std::cerr << "WWZ::computeMuonTopMVAScore: Input variable name " << vname << " does not match to a corresponding variable" << endl;
std::cerr << "Have you loaded the XGBoost binary? i.e. did you call muonLoadMVA()?" << std::endl;
assert(0);
}
}

muon_mvareader_map->eval(input_vars, res);

return res;
}

bool WWZ::muonID(int idx, WWZ::IDLevel id_level, int year) {
// Year-specific checks
switch (year) {
Expand Down
4 changes: 4 additions & 0 deletions NanoCORE/MuonSelections.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define MUONSELECTIONS_H
#include "Nano.h"
#include "Base.h"
#include "Tools/xgboost/XGBoostInterface.h"

namespace SS {
bool muonID(unsigned int idx, SS::IDLevel id_level, int year);
Expand All @@ -15,6 +16,9 @@ namespace ttH {
}

namespace WWZ {
extern std::shared_ptr<XGBoostInterface> muon_mvareader_map;
void muonLoadMVA(int year, bool isAPV);
float computeMuonTopMVAScore(unsigned int idx);
bool muonID(int idx, WWZ::IDLevel id_level, int year);
bool muon2016ID(unsigned int idx, WWZ::IDLevel id_level);
bool muon2017ID(unsigned int idx, WWZ::IDLevel id_level);
Expand Down
17 changes: 17 additions & 0 deletions NanoCORE/Tools/xgboost/MLWrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef MLWRAPPER_H
#define MLWRAPPER_H

#include <unordered_map>
#include <vector>

class MLWrapper{
public:

MLWrapper(){};
virtual ~MLWrapper(){};

virtual bool build(std::string fname, std::vector<std::string> const& varnames, float missing_entry_val) = 0;

};

#endif
41 changes: 41 additions & 0 deletions NanoCORE/Tools/xgboost/XGBoostInterface.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include <iostream>
#include <limits>
#include <ostream>
//#include <xgboost/c_api.h>
//#include "/cvmfs/cms.cern.ch/slc7_amd64_gcc900/external/py3-xgboost/0.90-ghbfee2/lib/python3.8/site-packages/xgboost/include/xgboost/c_api.h"
#include "XGBoostInterface.hpp"

XGBoostInterface::XGBoostInterface() : MLWrapper(), booster(nullptr), defval(0) {}

XGBoostInterface::~XGBoostInterface()
{
SAFE_XGBOOST(XGBoosterFree(*booster));
delete booster;
}

bool XGBoostInterface::build(std::string fname, std::vector<std::string> const &varnames, float missing_entry_val)
{

if (booster)
{
std::cerr << "XGBoostInterface::build: The booster is already built." << endl;
return false;
}
if (fname == "")
{
std::cerr << "XGBoostInterface::build: The file name is an empty string. This function should be called to load models from a file." << endl;
assert(0);
}

defval = missing_entry_val;
variable_names = varnames;

booster = new BoosterHandle;
SAFE_XGBOOST(XGBoosterCreate(nullptr, 0, booster));

// std::cout << "XGBoostInterface::build: A new xgboost is created. Loading the model in " << fname << "..." << endl;

SAFE_XGBOOST(XGBoosterLoadModel(*booster, fname.data()));

return true;
}
31 changes: 31 additions & 0 deletions NanoCORE/Tools/xgboost/XGBoostInterface.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef XGBOOSTINTERFACE_H
#define XGBOOSTINTERFACE_H

#include <xgboost/c_api.h>
//#include "${XGBOOST_PATH}/include/xgboost/c_api.h"
//#include "/cvmfs/cms.cern.ch/slc7_amd64_gcc900/external/py3-xgboost/0.90-ghbfee2/lib/python3.8/site-packages/xgboost/include/xgboost/c_api.h"
#include "MLWrapper.h"

class XGBoostInterface : public MLWrapper
{
protected:
BoosterHandle *booster;
float defval;
std::vector<std::string> variable_names;

public:
XGBoostInterface();
virtual ~XGBoostInterface();

bool build(std::string fname, std::vector<std::string> const &varnames, float missing_entry_val);

std::vector<std::string> const &getVariableNames() const { return variable_names; }

BoosterHandle *const &getBooster() const { return booster; }

template <typename T> bool eval(std::unordered_map<std::string, float> const &vars, std::vector<T> &res);
template <typename T> bool eval(std::unordered_map<std::string, float> const &vars, T &res);
};

#endif

Loading

0 comments on commit 15ad668

Please sign in to comment.