Skip to content

Commit

Permalink
improve benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
jcosborn committed Nov 17, 2024
1 parent 17401d1 commit d2032ec
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 62 deletions.
10 changes: 10 additions & 0 deletions src/base/globals.nim
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ macro setDefaultNc*(n: static[int]): untyped =
result = newEmptyNode()
macro getDefaultNc*(): untyped =
return newLit(defaultNc)

template getDefPrecStr:string =
const defPrec {.strdefine.} = "D"
defPrec

var defPrec* {.compiletime.} = getDefPrecStr()
macro setDefaultSingle* =
defPrec = "S"
echo "Default precision: ", defPrec
static: echo "Default precision: ", defPrec
57 changes: 12 additions & 45 deletions src/base/profile.nim
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import threading
export threading
import comms/comms, stdUtils, base/basicOps
import comms/comms, stdUtils, base/[basicOps,params]
import os, strutils, sequtils, std/monotimes, std/tables, std/algorithm, strformat
export monotimes
getOptimPragmas()
Expand All @@ -20,7 +20,7 @@ var

##[
Each Tic starts a local timer. Each Toc records the time difference
Each tic() starts a local timer. Each toc() records the time difference
of the current time with the one in the local timer visible in the
scope, and then update the timer with the current time.
Expand Down Expand Up @@ -516,49 +516,10 @@ template tocI(f: SomeNumber; s:SString = ""; n = -1) =
localCode {.global.} = newList[CodePoint]()
thisCode = CodePoint(-1)
if threadNum==0:
when false:
#echo "==== begin toc ",s," ",ii
#echo " rtiStack: ",indent($rtiStack,5)
#echo " cpHeap: ",indent($cpHeap,5)
if unlikely VerboseTimer: echoToc(s,ii)
if prevRTI.int32 >= 0:
if restartTimer:
thawTimers()
restartTimer = false
if not timersFrozen():
let theTime = getTics()
when not cname:
for c in items(localCode):
if cpHeap[c.int].name.equal(s):
thisCode = c
break
if thisCode.isNil:
thisCode = newCodePoint(ii.addr, s)
when not cname:
localCode.add thisCode
let
ns = theTime-localTimer
thisRTI = record(localTic, prevRTI, thisCode, ns, float(f))
var oh = rtiStack[thisRTI.int].childrenOverhead
let c = rtiStack[thisRTI.int].children
for i in 0..<c.len:
if toDropTimer(c[i].prev):
oh -= c[i].childrenOverhead
if oh.float / ns.float > DropWasteTimerRatio:
# Signal stop if the overhead is too large.
dropTimer(prevRTI)
if toDropTimer(thisCode):
freezeTimers()
restartTimer = true
localTimer = getTics()
rtiStack[thisRTI.int].overhead = nsec(localTimer-theTime)
prevRTI = thisRTI
#echo "==== end toc ",s," ",ii
when cname:
tocSet(localTimer,prevRTI,restartTimer,thisCode,f,s,ii.addr,localTic,false)
else:
when cname:
tocSet(localTimer,prevRTI,restartTimer,thisCode,f,s,ii.addr,localTic,false)
else:
tocSet(localTimer,prevRTI,restartTimer,thisCode,f,s,ii.addr,localTic,addr localCode)
tocSet(localTimer,prevRTI,restartTimer,thisCode,f,s,ii.addr,localTic,addr localCode)

when noTicToc:
template toc*() = discard
Expand Down Expand Up @@ -822,7 +783,7 @@ proc makeHotspotTable(lrti: List[RTInfoObj]): tuple[ns:int64,oh:int64] =
t.children.add ri.children[i]
do: # loc not found
hs[loc] = ri
#let tot = makeHotSpotTable(List[RTInfoObj](ri.children))
let tot = makeHotSpotTable(List[RTInfoObj](ri.children))
return (nstot, ohtot)

proc echoHotspots* =
Expand Down Expand Up @@ -879,6 +840,12 @@ proc echoHotspots* =
let tsnspct = 100.0 * tsns / nstot
echo &"{pct:6.3f} {tsnspct:7.3f} {count} {mf} {nc} S {lc} {nm}"

proc echoProf*(def = 0) =
case intParam("prof",def)
of 1: echoHotspots()
of 2: echoTimers()
else: discard

when isMainModule:
import os
proc test =
Expand Down
12 changes: 10 additions & 2 deletions src/bench/benchLinalg.nim
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ proc checkMem =
# fps: flops per site
# bps: bytes moved (load+store) per site
# mm: memory footprint (bytes) per site
var minNrep = int.high
template bench(fps,bps,mm,eqn: untyped) {.dirty.} =
block:
let vol = lo.nSites.float
Expand Down Expand Up @@ -73,12 +74,19 @@ template bench(fps,bps,mm,eqn: untyped) {.dirty.} =
inc nbench
echo "bench: ",nbench|(-6), "secs: ", dt|(6,3), " mf: ", mf|7, " mb: ", mb|7, " mem: ", mem, " nrep: ", nrep
echo exp2string(eqn), "\n"
minNrep = min(minNrep, nrep)
template bench(fps,bps,eqn: untyped) =
bench(fps,bps,0,eqn)

proc test(lat:auto) =
let maxl = intParam("maxl",16)
if lat[0] > maxl: return
let minl = intParam("minl",0)
if lat[0] < minl: return
let maxl = intParam("maxl",0)
if maxl>0:
if lat[0] > maxl: return
else:
if minNrep == 1: return
minNrep = int.high
var lo = newLayout(lat)
template newCV: untyped = lo.ColorVector()
template newCM: untyped = lo.ColorMatrix()
Expand Down
4 changes: 2 additions & 2 deletions src/bench/benchStagProp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ threads:
threadBarrier()
echo r.norm2
#echo v2
echoTimers()
echoProf()

var g3:array[8,type(g[0])]
for i in 0..3:
Expand All @@ -76,6 +76,6 @@ var s3 = newStag3(g3)
s3.solve(v2, v1, mass, sp)
resetTimers()
s3.solve(v2, v1, mass, sp)
echoTimers()
echoProf()

qexFinalize()
5 changes: 1 addition & 4 deletions src/experimental/stagag.nim
Original file line number Diff line number Diff line change
Expand Up @@ -2103,8 +2103,5 @@ if outfn != "":
echo "Saving gauge field to file: ", outfn
let err = g.saveGauge outfn

case intParam("prof",0)
of 1: echoTimers()
of 2: echoHotspots()
else: discard
echoProf()
qexFinalize()
1 change: 0 additions & 1 deletion src/physics/qcdTypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,6 @@ macro makeConstructors(x: untyped): untyped =
result = newStmtList()
result.add getAst mp(ident(f&"S"), ident("S"&r&"V"), ident"result")
result.add getAst mp(ident(f&"D"), ident("D"&r&"V"), ident"result")
const defPrec {.strdefine.} = "D"
result.add getAst mp(ident(f), ident(defPrec&r&"V"), ident"result")
# non-Simd versions
result.add getAst mp(ident(f&"S1"), ident("S"&r), ident"result")
Expand Down
26 changes: 18 additions & 8 deletions tests/base/tshift.nim
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,20 @@ template testS16(Smd: typedesc) =
qexInit()

template makeSimdArrayX(T,N,B: untyped) {.dirty.} =
makeSimdArray(`T X`, N, B)
type T = Simd[`T X`]
template toDoubleImpl(x: `T X`): untyped = x # always already double
#makeSimdArray(`T X`, N, B)
#type T = Simd[`T X`]
#template toDoubleImpl(x: `T X`): untyped = x # always already double
type T = Simd[SimdArrayObj[N,B]]

#testS1(float)

makeSimdArrayX(SD1, 1, float)
testS1(SD1)
when declared(SimdD1):
testS1(SimdD1)

#makeSimdArrayX(SS1, 1, float32)
#testS1(SS1)
makeSimdArrayX(SS1, 1, float32)
testS1(SS1)
when declared(SimdS1):
testS1(SimdS1)

Expand All @@ -146,31 +148,39 @@ testS2(SD2)
when declared(SimdD2):
testS2(SimdD2)

#makeSimdArrayX(SS2, 2, float32)
#testS2(SS2)
makeSimdArrayX(SS2, 2, float32)
testS2(SS2)
when declared(SimdS2):
testS2(SimdS2)

makeSimdArrayX(SD4, 4, float)
testS4(SD4)
when declared(SimdD4):
testS4(SimdD4)

makeSimdArrayX(SS4, 4, float32)
testS4(SS4)
when declared(SimdS4):
testS4(SimdS4)

makeSimdArrayX(SD8, 8, float)
testS8(SD8)
when declared(SimdD8):
testS8(SimdD8)

makeSimdArrayX(SS8, 8, float32)
testS8(SS8)
when declared(SimdS8):
testS8(SimdS8)

makeSimdArrayX(SD16, 16, float)
testS16(SD16)
when declared(SimdD16):
testS16(SimdD16)

makeSimdArrayX(SS16, 16, float32)
testS16(SS16)
when declared(SimdS16):
testS16(SimdS16)


qexFinalize()

0 comments on commit d2032ec

Please sign in to comment.