Skip to content

Commit

Permalink
Merge pull request #9 from akapoorcern/master
Browse files Browse the repository at this point in the history
Add scalers and correct rwt bug
  • Loading branch information
a-kapoor authored Apr 19, 2021
2 parents d22da64 + 9d04867 commit e652bb8
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 98 deletions.
6 changes: 5 additions & 1 deletion Tools/TrainConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@
"XGB_1": {'learning_rate':[0.1, 0.01, 0.001]},
"XGB_2": {'gamma':[0.5, 1],'learning_rate':[0.1, 0.01]},
}

Scaler = {"XGB_1":"MinMaxScaler",
"XGB_2":"StandardScaler",
"DNN_1":"MinMaxScaler" }
#
#To choose just one value for a parameter you can just specify value but in a list
#Like "XGB_1":{'gamma':[0.5],'learning_rate':[0.1, 0.01]}
Expand Down Expand Up @@ -137,7 +141,7 @@
SigEffWPs=["80%","90%"] # Example for 80% and 90% Signal Efficiency Working Points

######### Reweighting scheme #Feature not available but planned
Reweighting = 'ptetaSig'
Reweighing = 'ptetaSig'
ptbins = [10,30,40,50,100,5000]
etabins = [-1.6,-1.0,1.0,1.2,1.6]
'''
Expand Down
49 changes: 36 additions & 13 deletions Tools/ptetaRwt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "occupational-madrid",
"id": "surrounded-nursery",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -31,15 +31,15 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "joined-chart",
"id": "metropolitan-gossip",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
"id": "convenient-vancouver",
"id": "signed-trademark",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -58,6 +58,7 @@
" etabinstext.append(str(etabins[i])+'-'+str(etabins[i+1]))\n",
" import seaborn as sns\n",
" ptbinstext.reverse()\n",
" import pandas as pd\n",
" df = pd.DataFrame(data=data, columns=etabinstext, index=ptbinstext)\n",
" df=df[::-1].reset_index(drop=True)\n",
" sns.heatmap(df, square=False,ax=ax,cmap=\"Blues\",annot=True,cbar=False)\n",
Expand All @@ -71,6 +72,8 @@
" Sdata=[]\n",
" Bdata=[]\n",
" Wtdata=[]\n",
" Sigdf[NWt]=1\n",
" Bkgdf[NWt]=1\n",
" for i in range(len(ptbins)):\n",
" Bdatai=[]\n",
" Sdatai=[]\n",
Expand All @@ -82,46 +85,66 @@
" Bdatai.append(Bsum)\n",
" Ssum=Sigdf.query(sel)[Wt].sum()\n",
" Sdatai.append(Ssum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" if Bsum>0:\n",
" #print(\"BSum \"+str(Bsum))\n",
" #print(\"SSum \"+str(Ssum))\n",
" if Bsum>0 and Ssum>0:\n",
" #print(\"Entering1\")\n",
" Wtdatai.append(Ssum/Bsum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" else:\n",
" #print(\"Entering2\")\n",
" Wtdatai.append(1)\n",
" Bkgdf.loc[sel,NWt]=1\n",
" continue \n",
" if i<(len(ptbins)-1) and j==(len(etabins)-1):\n",
" sel=ele_pt+'>@ptbins[@i] & '+ele_pt+'<=@ptbins[@i+1] & '+scl_eta+'>@etabins[@j]'\n",
" Bsum=Bkgdf.query(sel)[Wt].sum()\n",
" Bdatai.append(Bsum)\n",
" Ssum=Sigdf.query(sel)[Wt].sum()\n",
" Sdatai.append(Ssum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" if Ssum>0:\n",
" #print(\"BSum \"+str(Bsum))\n",
" #print(\"SSum \"+str(Ssum))\n",
" if Bsum>0 and Ssum>0:\n",
" #print(\"Entering1\")\n",
" Wtdatai.append(Ssum/Bsum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" else:\n",
" #print(\"Entering2\")\n",
" Wtdatai.append(1)\n",
" Bkgdf.loc[sel,NWt]=1\n",
" continue \n",
" if i==(len(ptbins)-1) and j==(len(etabins)-1):\n",
" sel=ele_pt+'>@ptbins[@i] & '+scl_eta+'>@etabins[@j]'\n",
" Bsum=Bkgdf.query(sel)[Wt].sum()\n",
" Bdatai.append(Bsum)\n",
" Ssum=Sigdf.query(sel)[Wt].sum()\n",
" Sdatai.append(Ssum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" if Ssum>0:\n",
" #print(\"BSum \"+str(Bsum))\n",
" #print(\"SSum \"+str(Ssum))\n",
" if Bsum>0 and Ssum>0:\n",
" #print(\"Entering1\")\n",
" Wtdatai.append(Ssum/Bsum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" else:\n",
" #print(\"Entering2\")\n",
" Wtdatai.append(1)\n",
" Bkgdf.loc[sel,NWt]=1\n",
" continue \n",
" sel=ele_pt+'>@ptbins[@i] & '+ele_pt+'<=@ptbins[@i+1] & scl_eta>@etabins[@j] & '+scl_eta+'<@etabins[@j+1]'\n",
" Bsum=Bkgdf.query(sel)[Wt].sum()\n",
" Bdatai.append(Bsum)\n",
" Ssum=Sigdf.query(sel)[Wt].sum()\n",
" Sdatai.append(Ssum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" if Ssum>0:\n",
" #print(\"BSum \"+str(Bsum))\n",
" #print(\"SSum \"+str(Ssum))\n",
" if Bsum>0 and Ssum>0:\n",
" #print(\"Entering1\")\n",
" Wtdatai.append(Ssum/Bsum)\n",
" Bkgdf.loc[sel,NWt]=Ssum/Bsum\n",
" else:\n",
" #print(\"Entering2\")\n",
" Wtdatai.append(1)\n",
" Bkgdf.loc[sel,NWt]=1\n",
" Bdata.append(Bdatai)\n",
" Sdata.append(Sdatai)\n",
" Wtdata.append(Wtdatai)\n",
Expand All @@ -143,7 +166,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "natural-field",
"id": "twenty-treat",
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -183,7 +206,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "checked-protest",
"id": "abandoned-peeing",
"metadata": {},
"outputs": [],
"source": [
Expand Down
65 changes: 57 additions & 8 deletions Tools/ptetaRwt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import matplotlib.pyplot as plt


# In[2]:





# In[3]:


def ptetaplot(ptbins,etabins,data,ax,title):
etabinstext=[]
ptbinstext=[]
Expand Down Expand Up @@ -29,6 +45,8 @@ def ptetaRwtTested(Sigdf,Bkgdf,ptbins,etabins,Wt,NWt,ele_pt='ele_pt',scl_eta='sc
Sdata=[]
Bdata=[]
Wtdata=[]
Sigdf[NWt]=1
Bkgdf[NWt]=1
for i in range(len(ptbins)):
Bdatai=[]
Sdatai=[]
Expand All @@ -40,46 +58,66 @@ def ptetaRwtTested(Sigdf,Bkgdf,ptbins,etabins,Wt,NWt,ele_pt='ele_pt',scl_eta='sc
Bdatai.append(Bsum)
Ssum=Sigdf.query(sel)[Wt].sum()
Sdatai.append(Ssum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
if Bsum>0:
#print("BSum "+str(Bsum))
#print("SSum "+str(Ssum))
if Bsum>0 and Ssum>0:
#print("Entering1")
Wtdatai.append(Ssum/Bsum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
else:
#print("Entering2")
Wtdatai.append(1)
Bkgdf.loc[sel,NWt]=1
continue
if i<(len(ptbins)-1) and j==(len(etabins)-1):
sel=ele_pt+'>@ptbins[@i] & '+ele_pt+'<=@ptbins[@i+1] & '+scl_eta+'>@etabins[@j]'
Bsum=Bkgdf.query(sel)[Wt].sum()
Bdatai.append(Bsum)
Ssum=Sigdf.query(sel)[Wt].sum()
Sdatai.append(Ssum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
if Ssum>0:
#print("BSum "+str(Bsum))
#print("SSum "+str(Ssum))
if Bsum>0 and Ssum>0:
#print("Entering1")
Wtdatai.append(Ssum/Bsum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
else:
#print("Entering2")
Wtdatai.append(1)
Bkgdf.loc[sel,NWt]=1
continue
if i==(len(ptbins)-1) and j==(len(etabins)-1):
sel=ele_pt+'>@ptbins[@i] & '+scl_eta+'>@etabins[@j]'
Bsum=Bkgdf.query(sel)[Wt].sum()
Bdatai.append(Bsum)
Ssum=Sigdf.query(sel)[Wt].sum()
Sdatai.append(Ssum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
if Ssum>0:
#print("BSum "+str(Bsum))
#print("SSum "+str(Ssum))
if Bsum>0 and Ssum>0:
#print("Entering1")
Wtdatai.append(Ssum/Bsum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
else:
#print("Entering2")
Wtdatai.append(1)
Bkgdf.loc[sel,NWt]=1
continue
sel=ele_pt+'>@ptbins[@i] & '+ele_pt+'<=@ptbins[@i+1] & scl_eta>@etabins[@j] & '+scl_eta+'<@etabins[@j+1]'
Bsum=Bkgdf.query(sel)[Wt].sum()
Bdatai.append(Bsum)
Ssum=Sigdf.query(sel)[Wt].sum()
Sdatai.append(Ssum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
if Ssum>0:
#print("BSum "+str(Bsum))
#print("SSum "+str(Ssum))
if Bsum>0 and Ssum>0:
#print("Entering1")
Wtdatai.append(Ssum/Bsum)
Bkgdf.loc[sel,NWt]=Ssum/Bsum
else:
#print("Entering2")
Wtdatai.append(1)
Bkgdf.loc[sel,NWt]=1
Bdata.append(Bdatai)
Sdata.append(Sdatai)
Wtdata.append(Wtdatai)
Expand All @@ -97,6 +135,16 @@ def ptetaRwtTested(Sigdf,Bkgdf,ptbins,etabins,Wt,NWt,ele_pt='ele_pt',scl_eta='sc
plt.savefig(od+"/ReweightingPlot.pdf")
return Sigdf[NWt],Bkgdf[NWt]


# In[4]:





# In[5]:


def dataptetastrip(data1):
data=data1
for ptlist in data:
Expand All @@ -105,3 +153,4 @@ def dataptetastrip(data1):
data[-2] = [sum(i) for i in zip(data[-2], data[-1])]
data.pop(-1)
return data

Loading

0 comments on commit e652bb8

Please sign in to comment.