From a5d6f3d0aa8e5e8092afb27896545298697cb252 Mon Sep 17 00:00:00 2001 From: Ramkrishna Sharma Date: Fri, 29 Sep 2023 12:49:36 +0200 Subject: [PATCH] Hzz analysis 2l2q v2 dev r (#5) * Add golden json & fix submodule utils (#16) * - Removed the submodule utils * - Minor update in condor submission script. - Added summary of additional script information in the README file * Added golden json information to post-proc * - replaced string "nanoAOD_vvVBS" to "nanoAOD_skim" in few files - Fix the json bug (For MC we don't need to provide) - Added modules createJMECorrector, btagSFProducer, etc * - Still issue with the btagSF - Updated the method for modules to run - Added PUWeightProducer * Added a text file having list of MC samples * Added additional cuts for 2l2q channel * - Improved post-proc for the condor jobs submission * Fix a bug in root file name --- H4LCppModule.py | 19 ++- Input_2018.yml | 10 ++ condor_setup_lxplus.py | 15 +- input_data_Files/sample_list_v9_2018.dat | 6 + interface/H4LTools.h | 14 +- keep_and_drop.txt | 1 + post_proc.py | 170 ++++++++++++++--------- src/H4LTools.cc | 6 + 8 files changed, 164 insertions(+), 77 deletions(-) create mode 100644 input_data_Files/sample_list_v9_2018.dat diff --git a/H4LCppModule.py b/H4LCppModule.py index 80307db..f73310a 100644 --- a/H4LCppModule.py +++ b/H4LCppModule.py @@ -50,6 +50,8 @@ def __init__(self,year,cfgFile,isMC,isFSR): self.worker.InitializeJetcut(cfg['Jet']['pTcut'],cfg['Jet']['Etacut']) self.worker.InitializeEvtCut(cfg['MZ1cut'],cfg['MZZcut'],cfg['Higgscut']['down'],cfg['Higgscut']['up'],cfg['Zmass'],cfg['MZcut']['down'],cfg['MZcut']['up']) + self.worker.Initialize2l2qEvtCut(cfg['HZZ2l2q']['Leading_Lep_pT'], cfg['HZZ2l2q']['SubLeading_Lep_pT'], cfg['HZZ2l2q']['Lep_eta'], cfg['HZZ2l2q']['MZLepcut']['down'], cfg['HZZ2l2q']['MZLepcut']['up']) + self.passtrigEvts = 0 self.noCutsEvts = 0 self.passZZEvts = 0 @@ -105,6 +107,10 @@ def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree): self.out = wrappedOutputTree # common branches for 4l, 2l2q, 2l2nu channels + # boolean branch for 4l, 2l2q, 2l2nu channels + self.out.branch("passZZ4lSelection", "O") + self.out.branch("passZZ2l2qSelection", "O") + self.out.branch("passZZ2l2nuSelection", "O") self.out.branch("mass4l", "F") self.out.branch("pT4l", "F") self.out.branch("eta4l", "F") @@ -236,7 +242,7 @@ def analyze(self, event): self.worker.MuonPtCorrection(self.isMC) self.worker.LeptonSelection() - foundZZCandidate = False # for 4l + foundZZCandidate_4l = False # for 4l foundZZCandidate_2l2q = False # for 2l2q foundZZCandidate_2l2nu = False # for 2l2nu @@ -255,9 +261,12 @@ def analyze(self, event): pass elif (self.worker.nTightEle + self.worker.nTightMu >= 4): # This event should belong to 4l; nTightEle + nTightMu >= 4 - foundZZCandidate = self.worker.ZZSelection_4l() + foundZZCandidate_4l = self.worker.ZZSelection_4l() if (foundZZCandidate_2l2q): + + passZZ2l2qSelection = True + self.out.fillBranch("passZZ2l2qSelection",passZZ2l2qSelection) keepIt = True self.passZZEvts += 1 # FatJet_PNZvsQCD = self.worker.FatJet_PNZvsQCD @@ -273,7 +282,7 @@ def analyze(self, event): self.out.fillBranch("pTZ2_2j",pTZ2_2j) self.out.fillBranch("EneZ2_2j",EneZ2_2j) - if (foundZZCandidate or foundZZCandidate_2l2q): + if (foundZZCandidate_4l or foundZZCandidate_2l2q): keepIt = True self.passZZEvts += 1 @@ -295,8 +304,10 @@ def analyze(self, event): self.out.fillBranch("phiZ2",phiZ2) self.out.fillBranch("massZ2",massZ2) - if (foundZZCandidate): + if (foundZZCandidate_4l): keepIt = True + passZZ4lSelection = True + self.out.fillBranch("passZZ4lSelection",passZZ4lSelection) # self.passZZEvts += 1 D_CP = self.worker.D_CP D_0m = self.worker.D_0m diff --git a/Input_2018.yml b/Input_2018.yml index d49ac87..b7037c5 100644 --- a/Input_2018.yml +++ b/Input_2018.yml @@ -103,3 +103,13 @@ Jet: 30 Etacut: 4.7 + +HZZ2l2q: + Leading_Lep_pT: 40 + SubLeading_Lep_pT: 24 + Lep_eta: 2.4 + MZLepcut: + down: + 60 + up: + 120 diff --git a/condor_setup_lxplus.py b/condor_setup_lxplus.py index 4eb2f6b..4929dec 100644 --- a/condor_setup_lxplus.py +++ b/condor_setup_lxplus.py @@ -21,6 +21,7 @@ def main(args): condor_file_name = args.condor_file_name condor_queue = args.condor_queue condor_log_path = args.condor_log_path + DontCreateTarFile = args.DontCreateTarFile # Get top-level directory name from PWD TOP_LEVEL_DIR_NAME = os.path.basename(os.getcwd()) @@ -51,9 +52,9 @@ def main(args): dirName = dirsToCreate.dir_name # create tarball of present working CMSSW base directory - os.system('rm -f CMSSW*.tgz') + if not DontCreateTarFile: os.system('rm -f CMSSW*.tgz') import makeTarFile - makeTarFile.make_tarfile(cmsswDirPath, CMSSWRel+".tgz") + if not DontCreateTarFile: makeTarFile.make_tarfile(cmsswDirPath, CMSSWRel+".tgz") print("copying the "+CMSSWRel+".tgz file to eos path: "+storeDir+"\n") os.system('cp ' + CMSSWRel+".tgz" + ' '+storeDir+'/' + CMSSWRel+".tgz") @@ -125,9 +126,9 @@ def main(args): count_root_files+=1 count_jobs += 1 outjdl_file.write("Output = "+output_log_path+"/"+sample_name+"_$(Process).stdout\n") - outjdl_file.write("Error = "+output_log_path+"/"+sample_name+"_$(Process).stdout\n") + outjdl_file.write("Error = "+output_log_path+"/"+sample_name+"_$(Process).err\n") outjdl_file.write("Log = "+output_log_path+"/"+sample_name+"_$(Process).log\n") - outjdl_file.write("Arguments = "+(xrd_redirector+root_file).replace('/','\/')+" "+output_path+" "+EOS_Output_path+"\n") + outjdl_file.write("Arguments = "+(xrd_redirector+root_file)+" "+output_path+" "+EOS_Output_path+"\n") outjdl_file.write("Queue \n") print("Number of files: ",count_root_files) print("Number of jobs (till now): ",count_jobs) @@ -150,14 +151,15 @@ def main(args): outScript.write("\n"+'rm *.root'); outScript.write("\n"+'scramv1 b ProjectRename'); outScript.write("\n"+'eval `scram runtime -sh`'); - outScript.write("\n"+'sed -i "s/testfile = .*/testfile = \\"${1}\\"/g" '+post_proc_to_run); + # outScript.write("\n"+'sed -i "s/ifRunningOnCondor = .*/ifRunningOnCondor = True/g" '+post_proc_to_run); + # outScript.write("\n"+'sed -i "s/testfile = .*/testfile = \\"${1}\\"/g" '+post_proc_to_run); outScript.write("\n"+'echo "========================================="'); outScript.write("\n"+'echo "cat post_proc.py"'); outScript.write("\n"+'echo "..."'); outScript.write("\n"+'cat post_proc.py'); outScript.write("\n"+'echo "..."'); outScript.write("\n"+'echo "========================================="'); - outScript.write("\n"+command); + outScript.write("\n"+command + " --entriesToRun 0 --inputFile ${1} "); outScript.write("\n"+'echo "====> List root files : " '); outScript.write("\n"+'ls *.root'); outScript.write("\n"+'echo "====> copying *.root file to stores area..." '); @@ -194,6 +196,7 @@ class PreserveWhitespaceFormatter(argparse.RawTextHelpFormatter, argparse.Argume parser = argparse.ArgumentParser(description="Condor Job Submission", formatter_class=PreserveWhitespaceFormatter) parser.add_argument("--submission_name", default="Run2018_v9", help="String to be changed by user.") parser.add_argument("--use_custom_eos", default=False, action='store_true', help="Use custom EOS.") + parser.add_argument("--DontCreateTarFile", default=False, action='store_true', help="Create tar file of CMSSW directory.") parser.add_argument("--use_custom_eos_cmd", default='eos root://cmseos.fnal.gov find -name "*.root" /store/group/lnujj/VVjj_aQGC/custom_nanoAOD', help="Custom EOS command.") # input_file mandatory parser.add_argument("--input_file", default='', required=True, help="Input file from where to read DAS names.") diff --git a/input_data_Files/sample_list_v9_2018.dat b/input_data_Files/sample_list_v9_2018.dat new file mode 100644 index 0000000..6497534 --- /dev/null +++ b/input_data_Files/sample_list_v9_2018.dat @@ -0,0 +1,6 @@ +# Data +/SingleMuon/Run2018B-UL2018_MiniAODv2_NanoAODv9_GT36-v1/NANOAOD +# +# MC +/GluGluHToZZTo4L_M125_TuneCP5_13TeV_powheg2_JHUGenV7011_pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM +/GluGluHToZZTo2L2Q_M1000_TuneCP5_13TeV_powheg2_JHUGenV7011_pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM diff --git a/interface/H4LTools.h b/interface/H4LTools.h index ce87d46..82270c0 100644 --- a/interface/H4LTools.h +++ b/interface/H4LTools.h @@ -18,6 +18,8 @@ class H4LTools { float eleLoosedxycut,eleLoosedzcut,MuLoosedxycut,MuLoosedzcut,MuTightdxycut,MuTightdzcut,MuTightTrackerLayercut,MuTightpTErrorcut,MuHighPtBound,eleIsocut,MuIsocut; float fsrphotonPtcut,fsrphotonEtacut,fsrphotonIsocut,fsrphotondRlcut,fsrphotondRlOverPtcut, JetPtcut,JetEtacut; float eleBDTWPLELP,eleBDTWPMELP,eleBDTWPHELP,eleBDTWPLEHP,eleBDTWPMEHP,eleBDTWPHEHP; + float HZZ2l2q_Leading_Lep_pT, HZZ2l2q_SubLeading_Lep_pT, HZZ2l2q_Lep_eta, HZZ2l2q_MZLepcutdown, HZZ2l2q_MZLepcutup; + void InitializeElecut(float elePtcut_,float eleEtacut_,float elesip3dCut_,float eleLoosedxycut_,float eleLoosedzcut_,float eleIsocut_,float eleBDTWPLELP_,float eleBDTWPMELP_, float eleBDTWPHELP_,float eleBDTWPLEHP_,float eleBDTWPMEHP_,float eleBDTWPHEHP_){ elePtcut = elePtcut_; eleEtacut = eleEtacut_; @@ -32,7 +34,17 @@ class H4LTools { eleBDTWPMEHP = eleBDTWPMEHP_; eleBDTWPHEHP = eleBDTWPHEHP_; } - void InitializeMucut(float MuPtcut_,float MuEtacut_,float Musip3dCut_,float MuLoosedxycut_,float MuLoosedzcut_,float MuIsocut_,float MuTightdxycut_,float MuTightdzcut_,float MuTightTrackerLayercut_,float MuTightpTErrorcut_,float MuHighPtBound_){ + + void Initialize2l2qEvtCut(float HZZ2l2q_Leading_Lep_pT_, float HZZ2l2q_SubLeading_Lep_pT_, float HZZ2l2q_Lep_eta_, float HZZ2l2q_MZLepcutdown_, float HZZ2l2q_MZLepcutup_) { + HZZ2l2q_Leading_Lep_pT = HZZ2l2q_Leading_Lep_pT_; + HZZ2l2q_SubLeading_Lep_pT = HZZ2l2q_SubLeading_Lep_pT_; + HZZ2l2q_Lep_eta = HZZ2l2q_Lep_eta_; + HZZ2l2q_MZLepcutdown = HZZ2l2q_MZLepcutdown_; + HZZ2l2q_MZLepcutup = HZZ2l2q_MZLepcutup_; + } + + void InitializeMucut(float MuPtcut_, float MuEtacut_, float Musip3dCut_, float MuLoosedxycut_, float MuLoosedzcut_, float MuIsocut_, float MuTightdxycut_, float MuTightdzcut_, float MuTightTrackerLayercut_, float MuTightpTErrorcut_, float MuHighPtBound_) + { MuPtcut = MuPtcut_; MuEtacut = MuEtacut_; Musip3dCut = Musip3dCut_; diff --git a/keep_and_drop.txt b/keep_and_drop.txt index a4def62..700a3be 100644 --- a/keep_and_drop.txt +++ b/keep_and_drop.txt @@ -87,3 +87,4 @@ keep FatJet_* keep Gen* keep Jet_* keep Muon* +keep pass* diff --git a/post_proc.py b/post_proc.py index f0b8483..736355c 100755 --- a/post_proc.py +++ b/post_proc.py @@ -1,77 +1,115 @@ #!/usr/bin/env python -import os,sys +import os +import sys +import argparse from PhysicsTools.NanoAODTools.postprocessing.framework.postprocessor import PostProcessor +from PhysicsTools.NanoAODTools.postprocessing.modules.jme.jetmetHelperRun2 import createJMECorrector +from PhysicsTools.NanoAODTools.postprocessing.modules.btv.btagSFProducer import btagSFProducer +from PhysicsTools.NanoAODTools.postprocessing.modules.common.puWeightProducer import * + +# Custom module imports from H4Lmodule import * from H4LCppModule import * -from PhysicsTools.NanoAODTools.postprocessing.modules.jme.jetmetHelperRun2 import * -from PhysicsTools.NanoAODTools.postprocessing.modules.btv.btagSFProducer import * from JetSFMaker import * -ifRunningOnCondor = False +def parse_arguments(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--inputFile", default="", type=str, help="Input file name") + parser.add_argument("-n", "--entriesToRun", default=100, type=int, help="Set to 0 if need to run over all entries else put number of entries to run") + parser.add_argument("-d", "--DownloadFileToLocalThenRun", default=True, type=bool, help="Download file to local then run") + return parser.parse_args() + + +def getListFromFile(filename): + """Read file list from a text file.""" + with open(filename, "r") as file: + return ["root://cms-xrd-global.cern.ch/" + line.strip() for line in file] + + +def main(): + args = parse_arguments() + + # Initial setup + testfilelist = [] + modulesToRun = [] + isMC = True + isFSR = False + year = None + cfgFile = None + jsonFileName = None + sfFileName = None + + entriesToRun = int(args.entriesToRun) + DownloadFileToLocalThenRun = args.DownloadFileToLocalThenRun + + # Determine list of files to process + if args.inputFile.endswith(".txt"): + testfilelist = getListFromFile(args.inputFile) + elif args.inputFile.endswith(".root"): + testfilelist.append(args.inputFile) + else: + print("INFO: No input file specified. Using default file list.") + testfilelist = getListFromFile("ExampleInputFileList.txt") + print("DEBUG: Input file list: {}".format(testfilelist)) + if len(testfilelist) == 0: + print("ERROR: No input files found. Exiting.") + exit(1) + + # Determine the year and type (MC or Data) + first_file = testfilelist[0] + isMC = "/data/" not in first_file + + if "UL18" in first_file or "UL2018" in first_file: + """UL2018 for identification of 2018 UL data and UL18 for identification of 2018 UL MC + """ + year = 2018 + cfgFile = "Input_2018.yml" + jsonFileName = "golden_Json/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt" + sfFileName = "DeepCSV_102XSF_V2.csv" + + if "UL17" in first_file or "UL2017" in first_file: + year = 2017 + cfgFile = "Input_2017.yml" + jsonFileName="golden_Json/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" + sfFileName = "DeepCSV_102XSF_V2.csv" + + if "UL16" in first_file or "UL2016" in first_file: + year = 2016 + jsonFileName = "golden_Json/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt" + sfFileName = "DeepCSV_102XSF_V2.csv" + + H4LCppModule = lambda: HZZAnalysisCppProducer(year,cfgFile, isMC, isFSR) + modulesToRun.extend([H4LCppModule()]) -testfilelist = [] + print("Input json file: {}".format(jsonFileName)) + print("Input cfg file: {}".format(cfgFile)) + print("isMC: {}".format(isMC)) + print("isFSR: {}".format(isFSR)) -if ifRunningOnCondor: - testfile = "root://cms-xrd-global.cern.ch//store/mc/RunIISummer20UL18NanoAODv9/GluGluHToZZTo4L_M125_TuneCP5_13TeV_powheg2_JHUGenV7011_pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/120000/3ED05633-EBB7-4A44-8F9D-CD956490BCFD.root" - testfilelist.append(testfile) + if isMC: + jetmetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK4PFchs") + fatJetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK8PFPuppi") + # btagSF = lambda: btagSFProducer("UL"+str(year), algo="deepjet",selectedWPs=['L','M','T','shape_corr'], sfFileName=sfFileName) + btagSF = lambda: btagSFProducer(era = "UL"+str(year), algo = "deepcsv") + puidSF = lambda: JetSFMaker("%s" % year) + modulesToRun.extend([jetmetCorrector(), fatJetCorrector(), puidSF()]) + # # modulesToRun.extend([jetmetCorrector(), fatJetCorrector(), btagSF(), puidSF()]) -else: - if len(sys.argv) > 1: - InputFileList = sys.argv[1] + if year == 2018: modulesToRun.extend([puAutoWeight_2018()]) + if year == 2017: modulesToRun.extend([puAutoWeight_2017()]) + if year == 2016: modulesToRun.extend([puAutoWeight_2016()]) + + p=PostProcessor(".",testfilelist, None, None,modules = modulesToRun, provenance=True,fwkJobReport=False,haddFileName="skimmed_nano_mc.root", maxEntries=entriesToRun, prefetch=DownloadFileToLocalThenRun, outputbranchsel="keep_and_drop.txt") else: - InputFileList = "ExampleInputFileList.txt" - with open(InputFileList, 'r') as file: - for line in file: - # Remove newline characters - line = line.strip() - # Append the line to the list with the "root://cms-xrd-global.cern.ch//" prefix - testfilelist.append("root://cms-xrd-global.cern.ch/" + line) - -# Set entriesToRun = 0 if need to run over all entries else put number of entries to run -entriesToRun = 0 if ifRunningOnCondor else 100 - -isMC = True -isFSR = False -jsonFileName = "" -# Keep DownloadFileToLocalThenRun=True this should reduce the file read error from eos. -DownloadFileToLocalThenRun=True - -if testfilelist[0].find("/data/") != -1: - isMC = False - -if testfilelist[0].find("UL18") != -1 or testfilelist[0].find("UL2018") != -1: # UL2018 for identification of 2018 UL data and UL18 for identification of 2018 UL MC - year = 2018 - cfgFile = 'Input_2018.yml' - jsonFileName="golden_Json/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt" - sfFileName="DeepCSV_102XSF_V2.csv" - -if testfilelist[0].find("UL17") != -1 or testfilelist[0].find("UL2017") != -1: - year = 2017 - cfgFile = 'Input_2017.yml' - jsonFileName="golden_Json/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" - sfFileName="DeepCSV_94XSF_V5_B_F.csv" - -if testfilelist[0].find("UL16") != -1 or testfilelist[0].find("UL2016") != -1: - sfFileName="DeepCSV_2016LegacySF_V1.csv" - -H4LCppModule = lambda: HZZAnalysisCppProducer(year,cfgFile, isMC, isFSR) -print("Input json file: {}".format(jsonFileName)) -print("Input cfg file: {}".format(cfgFile)) -print("isMC: {}".format(isMC)) -print("isFSR: {}".format(isFSR)) - -if isMC: - jetmetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK4PFchs") - fatJetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK8PFPuppi") - btagSF = lambda: btagSFProducer("UL"+str(year), algo="deepjet",selectedWPs=['L','M','T','shape_corr'], sfFileName=sfFileName) - puidSF = lambda: JetSFMaker("%s" % year) - # p=PostProcessor(".",testfilelist, None, None,[H4LCppModule(), jetmetCorrector(), fatJetCorrector(), btagSF(), puidSF()], provenance=True,fwkJobReport=False,haddFileName="nano_M125_cpp.root", maxEntries=entriesToRun, prefetch=DownloadFileToLocalThenRun, outputbranchsel="keep_and_drop.txt") - p=PostProcessor(".",testfilelist, None, None,[H4LCppModule(), jetmetCorrector(), fatJetCorrector(), puidSF()], provenance=True,fwkJobReport=False,haddFileName="nano_M125_cpp.root", maxEntries=entriesToRun, prefetch=DownloadFileToLocalThenRun, outputbranchsel="keep_and_drop.txt") -else: - jetmetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK4PFchs") - fatJetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK8PFPuppi") - p=PostProcessor(".",testfilelist, None, None,[H4LCppModule(), jetmetCorrector(), fatJetCorrector()], provenance=False, fwkJobReport=False,haddFileName="nano_M125_cpp.root", jsonInput=jsonFileName, maxEntries=entriesToRun, prefetch=DownloadFileToLocalThenRun, outputbranchsel="keep_and_drop_data.txt") - -p.run() -print "DONE" + jetmetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK4PFchs") + fatJetCorrector = createJMECorrector(isMC=isMC, dataYear=year, jesUncert="All", jetType = "AK8PFPuppi") + modulesToRun.extend([jetmetCorrector(), fatJetCorrector()]) + + p=PostProcessor(".",testfilelist, None, None, modules = modulesToRun, provenance=True, fwkJobReport=False,haddFileName="skimmed_nano_data.root", jsonInput=jsonFileName, maxEntries=entriesToRun, prefetch=DownloadFileToLocalThenRun, outputbranchsel="keep_and_drop_data.txt") + + p.run() + +if __name__ == "__main__": + main() diff --git a/src/H4LTools.cc b/src/H4LTools.cc index d277d63..6d98abb 100644 --- a/src/H4LTools.cc +++ b/src/H4LTools.cc @@ -959,6 +959,12 @@ bool H4LTools::ZZSelection_2l2q(){ return foundZZCandidate; } + // Add tighter lepton pT cut as required by the 2l2q analysis + if ( Zlep1pt[0] < HZZ2l2q_Leading_Lep_pT || Zlep2pt[0] < HZZ2l2q_SubLeading_Lep_pT ) + { + return foundZZCandidate; + } + //Find ZZ candidate std::vector Z1CanIndex; std::vector Z2CanIndex;