From cfa2c8cee31b81fe76feb44a423813e6e4146329 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Sun, 10 Mar 2024 20:11:42 -0500 Subject: [PATCH 01/60] small additions --- src/base/stdUtils.nim | 6 ++++++ src/eigens/linalgFuncs.nim | 2 +- src/field/fieldET.nim | 5 ++++- src/maths/complexProxy.nim | 16 ++++++++-------- src/physics/stagD.nim | 18 ++++++++++++++++++ src/physics/stagSolve.nim | 29 +++++++++++++++++++++++++---- src/solvers/cg.nim | 4 ++-- 7 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/base/stdUtils.nim b/src/base/stdUtils.nim index 401c088..e3f97ae 100644 --- a/src/base/stdUtils.nim +++ b/src/base/stdUtils.nim @@ -184,6 +184,12 @@ proc `+=`*[T](r: var openArray[T], x: openArray[T]) {.inline.} = for i in 0.. Date: Tue, 26 Mar 2024 14:47:49 -0500 Subject: [PATCH 02/60] improve CG preconditioning --- src/physics/color.nim | 2 + src/physics/stagD.nim | 1 + src/physics/stagSolve.nim | 7 +- src/solvers/cg.nim | 264 +++++++++++++++++++++++++++++-------- src/solvers/gcr.nim | 81 +++++++----- src/solvers/solverBase.nim | 4 +- 6 files changed, 270 insertions(+), 89 deletions(-) diff --git a/src/physics/color.nim b/src/physics/color.nim index 8a13f0a..91dc40d 100644 --- a/src/physics/color.nim +++ b/src/physics/color.nim @@ -167,6 +167,8 @@ template iadd*(r: var Color, x: AsComplex) = iadd(r[], x) template iadd*(r: var Color, x: Color2) = iadd(r[], x[]) +template isub*(r: var Color, x: SomeNumber) = + isub(r[], x) template isub*(r: var Color, x: Color2) = isub(r[], x[]) template imul*(r: var Color, x: SomeNumber) = diff --git a/src/physics/stagD.nim b/src/physics/stagD.nim index 4866f4c..5862553 100644 --- a/src/physics/stagD.nim +++ b/src/physics/stagD.nim @@ -582,6 +582,7 @@ proc eoReduce*(s:Staggered; r,b:Field; m:SomeNumber) = proc eoReconstruct*(s:Staggered; r,b:Field; m:SomeNumber) = # r.odd = (b.odd - Doe r.even)/m stagD(s.so, r, s.g, r, 0.0, -1.0/m) + threadBarrier() r.odd += b/m # (d/dg) redot[ (2D)*x, c ] diff --git a/src/physics/stagSolve.nim b/src/physics/stagSolve.nim index 25168a7..34f309d 100644 --- a/src/physics/stagSolve.nim +++ b/src/physics/stagSolve.nim @@ -66,7 +66,7 @@ proc solveXX*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams; #var oap = (apply: op, applyPrecon: oppre) #cg.solve(oap, sp) #else: - var oa = (apply: op) + var oa = (apply: op, precon: cpNone) cg.solve(oa, sp) toc("cg.solve") sp.calls = 1 @@ -81,6 +81,7 @@ proc solveXX*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams; of sbQuda: tic() if parEven: + #echo x.even.norm2, " ", sp.r2req s.qudaSolveEE(r,x,m,sp) toc("qudaSolveEE") else: @@ -178,6 +179,7 @@ proc solveReconL(s:Staggered; x,b:Field; m:SomeNumber; sp: var SolverParams; toc("setup") s.solveEE(x, d, m, sp) toc("solveEE") + #echo "solveReconL ", d2e, " ", sp.r2req threads: x.even *= 4 threadBarrier() @@ -247,6 +249,7 @@ proc solve*(s:Staggered; x,b:Field; m:SomeNumber; sp0: var SolverParams) = s.D(r, x, m) threadBarrier() r := b - r + threadBarrier() let r2et = r.even.norm2 r2ot = r.odd.norm2 @@ -255,7 +258,7 @@ proc solve*(s:Staggered; x,b:Field; m:SomeNumber; sp0: var SolverParams) = r2o = r2ot r2 = r2e + r2o if sp.verbosity>0: - echo "stagSolve r2: ", r2/b2 + echo "stagSolve r2/b2: ", r2/b2 sp.r2.init r2/b2 sp.calls = 1 diff --git a/src/solvers/cg.nim b/src/solvers/cg.nim index bda13bf..16ca02b 100644 --- a/src/solvers/cg.nim +++ b/src/solvers/cg.nim @@ -5,12 +5,18 @@ import solverBase export solverBase type + CgPrecon* = enum + cpNone, + cpHerm, + cpLeftRight, + #cpRightNonHerm # x = Ry, r = b-ARy, min r'A^-1r, p'r = 0 CgState*[T] = object r*,Ap,b*: T - p,x*,z: T - b2,r2,r2old,r2stop,rz,rzold: float + p,x*: T + z,q,LAp: T + b2,r2,r2old,r2stop,rz,rzold,alpha: float iterations: int - precon: bool + precon: CgPrecon proc reset*(cgs: var CgState) = cgs.b2 = -1 @@ -19,17 +25,29 @@ proc reset*(cgs: var CgState) = cgs.rzold = 1.0 cgs.r2stop = 0.0 -proc newCgState*[T](x,b: T; precon=false): CgState[T] = +proc initPrecon*(state: var CgState) = + case state.precon + of cpNone: + state.z = state.r + state.q = state.p + state.LAp = state.Ap + of cpHerm: + state.z = newOneof state.r + state.q = state.p + state.LAp = state.Ap + of cpLeftRight: + state.z = newOneof state.r + state.q = newOneof state.p + state.LAp = newOneOf state.z + +proc newCgState*[T](x,b: T): CgState[T] = result.r = newOneOf b result.Ap = newOneOf b result.b = b result.p = newOneOf x result.x = x - result.precon = precon - if precon: - result.z = newOneof b - else: - result.z = result.r + result.precon = cpNone + result.initPrecon result.reset # solves: A x = b @@ -37,25 +55,22 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = mixin apply, applyPrecon tic() let vrb = sp.verbosity - template verb(n:int; body:untyped):untyped = + template verb(n:int; body:untyped) = if vrb>=n: body let sub = sp.subset - template subset(body:untyped):untyped = + template subset(body:untyped) = onNoSync(sub): body - template mythreads(body:untyped):untyped = + template mythreads(body:untyped) = threads: onNoSync(sub): body - const precon = compiles(op.applyPrecon(state.z, state.r)) + let precon = op.precon if precon != state.precon: state.precon = precon - if precon: - state.z = newOneOf state.r - else: - state.z = state.r - + state.initPrecon + state.reset let r = state.r p = state.p @@ -63,10 +78,55 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = x = state.x b = state.b z = state.z + q = state.q + LAp = state.LAp var b2 = state.b2 r2 = state.r2 rz = state.rz + #qLAp = state.qLAp + + if precon == cpHerm: + when not compiles(op.applyPrecon(z, r)): + qexError("cg.solve: precon == cpHerm but op.applyPrecon not found") + if precon == cpLeftRight: + when not compiles(op.applyPreconL(z, r)): + qexError("cg.solve: precon == cpLeftRight but op.applyPreconL not found") + when not compiles(op.applyPreconR(p, q)): + qexError("cg.solve: precon == cpLeftRight but op.applyPreconR not found") + + template getRz = + case precon + of cpNone: + rz = r2 + of cpHerm: + subset: + rz = r.redot z + of cpLeftRight: + subset: + rz = z.norm2 # convenient to use rz here for z2 + #of cpRightNonHerm: + # subset: + # rz = Ap.dot z # convenient to use rz here + template preconL(z, r) = + case precon + of cpNone: + discard + of cpHerm: + when compiles(op.applyPrecon(z, r)): + op.applyPrecon(z, r) + of cpLeftRight: + when compiles(op.applyPreconL(z, r)): + op.applyPreconL(z, r) + template preconR(p, q) = + case precon + of cpNone: + discard + of cpHerm: + discard + of cpLeftRight: + when compiles(op.applyPreconR(p, q)): + op.applyPreconR(p, q) if b2<0: # first call mythreads: @@ -78,8 +138,6 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = mythreads: x := 0 r := 0 - if precon: - z := 0 r2 = 0.0 rz = 0.0 else: @@ -99,40 +157,53 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = var itn0 = state.iterations var r2o0 = state.r2old var rzo0 = state.rzold - + var alpha0 = state.alpha toc("cg setup") + if r2 > r2stop: threads: var itn = itn0 var r2o = r2o0 var rzo = rzo0 + var alpha = alpha0 + var qlap = 0.0 verb(1): - #echo(-1, " ", r2) - echo(itn, " ", r2/b2) + echo("CG iteration: ", itn, " r2/b2: ", r2/b2) while itnr2stop: tic() - when precon: - op.applyPrecon(z, r) - subset: - rz = r.redot z + if itn == 0 or precon != cpLeftRight: + preconL(z, r) # z = L r or z = R r for RightNonHerm else: - rz = r2 - let beta = rz/rzo + subset: + z -= alpha * LAp + getRz() # r.z or z.z or Ap.z + var beta = 0.0 + #if precon == cpRightNonHerm: + # beta = -rz / qLAp + #else: + beta = rz/rzo r2o = r2 rzo = rz subset: - p := z + beta*p - toc("p update", flops=2*numNumbers(r[0])*sub.lenOuter) + if itn == 0: + q := z + else: + q := z + beta*q + toc("q update", flops=2*numNumbers(q[0])*sub.lenOuter) verb(3): echo "beta: ", beta + preconR(p, q) # p = R q inc itn op.apply(Ap, p) toc("Ap") + if precon == cpLeftRight: + preconL(LAp, Ap) # LAp = L Ap subset: - let pAp = p.redot(Ap) - toc("pAp", flops=2*numNumbers(p[0])*sub.lenOuter) - let alpha = rz/pAp + #let pAp = p.redot(Ap) + qLAp = q.redot(LAp) + toc("qLAp", flops=2*numNumbers(p[0])*sub.lenOuter) + alpha = rz/qLAp x += alpha*p toc("x", flops=2*numNumbers(p[0])*sub.lenOuter) r -= alpha*Ap @@ -141,43 +212,54 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = toc("r2", flops=2*numNumbers(r[0])*sub.lenOuter) verb(2): #echo(itn, " ", r2) - echo(itn, " ", r2/b2) + echo("CG iteration: ", itn, " r2/b2: ", r2/b2) verb(3): subset: - let pAp = p.redot(Ap) - echo "p2: ", p.norm2 - echo "Ap2: ", Ap.norm2 - echo "pAp: ", pAp - echo "alpha: ", r2o/pAp + #qLAp = q.redot(LAp) + #echo "p2: ", p.norm2 + #echo "Ap2: ", Ap.norm2 + echo "rz: ", rz + echo "qLAp: ", qLAp + echo "alpha: ", alpha echo "x2: ", x.norm2 echo "r2: ", r2 + echo "z2: ", z.norm2 + echo "q2: ", q.norm2 + echo "Ap2: ", Ap.norm2 + echo "LAp2: ", LAp.norm2 op.apply(Ap, x) var fr2: float subset: - fr2 = (b - Ap).norm2 - echo " ", fr2, " ", fr2/b2 + threadBarrier() + Ap -= b + threadBarrier() + fr2 = Ap.norm2 + echo "fr2: ", fr2, " fr2/b2: ", fr2/b2 if itn mod 64 == 0: aggregateTimers() toc("cg iterations") if threadNum==0: itn0 = itn r2o0 = r2o rzo0 = rzo + alpha0 = alpha #var fr2: float #op.apply(Ap, x) #subset: # r := b - Ap # fr2 = r.norm2 #verb(1): - # echo iterations, " acc r2:", r2/b2 - # echo iterations, " tru r2:", fr2/b2 + # echo iterations, " acc r2: ", r2/b2 + # echo iterations, " tru r2: ", fr2/b2 state.iterations = itn0 state.r2old = r2o0 state.r2 = r2 state.rzold = rzo0 state.rz = rz + state.alpha = alpha0 + #state.qLAp = qLAp verb(1): - echo state.iterations, " acc r2:", r2/b2 + echo "CG final iterations: ", state.iterations, " r2/b2: ", r2/b2 #threads: # op.apply(Ap, x) # var fr2: float @@ -213,21 +295,36 @@ when isMainModule: type opArgs = object m: type(m) - var oa = opArgs(m: m) + precon: CgPrecon + var oa = opArgs(m: m, precon: cpNone) proc apply*(oa: opArgs; r: type(v1); x: type(v1)) = r := oa.m*x #mul(r, m, x) + type opArgsP = object m: type(m) - var oap = opArgsP(m: m) + precon: CgPrecon + var oap = opArgsP(m: m, precon: cpHerm) proc apply*(oa: opArgsP; r: type(v1); x: type(v1)) = r := oa.m*x #mul(r, m, x) proc applyPrecon*(oa: opArgsP; r: type(v1); x: type(v1)) = - for e in r: - let t = sqrt(1.0 / m[e][0,0]) - r[e] := t * x[e] - #mul(r, m, x) + for e in r: + let t = sqrt(1.0 / m[e][0,0]) + r[e] := t * x[e] + #mul(r, m, x) + var precL = true + var precR = true + proc applyPreconL*(oa: opArgsP; r: type(v1); x: type(v1)) = + if precL: + applyPrecon(oa, r, x) + else: + r := x + proc applyPreconR*(oa: opArgsP; r: type(v1); x: type(v1)) = + if precR: + applyPrecon(oa, r, x) + else: + r := x var sp:SolverParams sp.r2req = 1e-20 @@ -249,12 +346,20 @@ when isMainModule: template resid(r,b,x,oa: untyped) = oa.apply(r, x) r := b - r + template checkResid(r, b, x, oa: auto) = + resid(r, b, x, oa) + let r2 = r.norm2 + let b2 = b.norm2 + echo "true r2/b2: ", r2/b2 #cgSolve(v2, v1, oa, sp) var cg = newCgState(x=v2, b=v1) + echo "starting cg.solve" cg.solve(oa, sp) - echo sp.finalIterations + checkResid(v3, v1, v2, oa) + echo "end cg.solve iterations: ", sp.finalIterations + echo "starting cg.solve restart test" v2 := 0 cg.reset sp.maxits = 0 @@ -268,7 +373,10 @@ when isMainModule: #cg.r := v3 #cg.r2 = tr2 echo sp.finalIterations, " ", cg.r2, "/", cg.r2stop + checkResid(v3, v1, v2, oa) + echo "end cg.solve restart test" + echo "starting cg.solve restart test 2" v2 := 0 cg.reset sp.maxits = 0 @@ -277,12 +385,64 @@ when isMainModule: cg.solve(oa, sp) let c = cg.x.norm2 echo cg.iterations, ": ", c, " ", cg.r2 + checkResid(v3, v1, v2, oa) + echo "end cg.solve restart test 2" + + echo "starting cg.solve cpHerm restart test" + v2 := 0 + cg.reset + sp.maxits = 0 + sp.verbosity = 0 + while cg.r2 > cg.r2stop: + sp.maxits += 10 + cg.solve(oap, sp) + let c = cg.x.norm2 + echo cg.iterations, ": ", c, " ", cg.r2 + checkResid(v3, v1, v2, oa) + echo "end cg.solve cpHerm restart test" + + echo "starting cg.solve cpLeftRight restart test" + oap.precon = cpLeftRight + v2 := 0 + cg.reset + sp.maxits = 0 + sp.verbosity = 1 + while cg.r2 > cg.r2stop: + sp.maxits += 10 + cg.solve(oap, sp) + let c = cg.x.norm2 + echo cg.iterations, ": ", c, " ", cg.r2 + checkResid(v3, v1, v2, oa) + echo "end cg.solve cpLeftRight restart test" + echo "starting cg.solve cpLeftRight R restart test" + oap.precon = cpLeftRight + precL = false + precR = true v2 := 0 cg.reset sp.maxits = 0 + sp.verbosity = 0 + while cg.r2 > cg.r2stop: + sp.maxits += 10 + cg.solve(oap, sp) + let c = cg.x.norm2 + echo cg.iterations, ": ", c, " ", cg.r2 + checkResid(v3, v1, v2, oa) + echo "end cg.solve cpLeftRight R restart test" + + echo "starting cg.solve cpLeftRight L restart test" + oap.precon = cpLeftRight + precL = true + precR = false + v2 := 0 + cg.reset + sp.maxits = 0 + sp.verbosity = 0 while cg.r2 > cg.r2stop: sp.maxits += 10 cg.solve(oap, sp) let c = cg.x.norm2 echo cg.iterations, ": ", c, " ", cg.r2 + checkResid(v3, v1, v2, oa) + echo "end cg.solve cpLeftRight L restart test" diff --git a/src/solvers/gcr.nim b/src/solvers/gcr.nim index 86d9096..dad25f6 100644 --- a/src/solvers/gcr.nim +++ b/src/solvers/gcr.nim @@ -53,17 +53,21 @@ template `[]`(gs: GcrState, i: int): untyped = gs.vecs[i] #template level(gs: GcrState, i: int): untyped = gs[i].level proc combine*(gs: var GcrState, n: int) = - var c = gs[n-1].alpha / gs[n].alpha - gs[n].Avec += c * gs[n-1].Avec - swap(gs[n-1].Avec, gs[n].Avec) - gs[n-1].Avn = gs[n].Avn + c.norm2 * gs[n-1].Avn - c += gs[n].beta[n-1] - gs[n].vec += c * gs[n-1].vec - swap(gs[n-1].vec, gs[n].vec) - gs[n-1].alpha = gs[n].alpha + let gsp = addr gs + var c = gsp[][n-1].alpha / gsp[][n].alpha + var d = c + gsp[][n].beta[n-1] + threads: + #block: + var c = gsp[][n-1].alpha / gsp[][n].alpha + gsp[][n].Avec += c * gsp[][n-1].Avec + gsp[][n-1].Avn = gsp[][n].Avn + c.norm2 * gsp[][n-1].Avn + gsp[][n].vec += d * gsp[][n-1].vec + gsp[][n-1].alpha = gsp[][n].alpha for i in 0.. rsqstop and total_iterations < max_iterations: + tic() inc(iteration) inc(total_iterations) #echo "begin addvec" gs.addvec + toc("addvec") #echo "end addvec" let nv = gs.nv - 1 if nv == 1: - gs[0].vec := gs[0].alpha * gs[0].vec - op.apply(gs[0].Avec, gs[0].vec) - gs[0].Avn = gs[0].Avec.norm2 - op.apply(Ap, x) - r := b - Ap - let ctmp = dot(gs[0].Avec, r) - gs[0].alpha = ctmp / gs[0].Avn - r -= gs[0].alpha * gs[0].Avec - rsq = r.norm2 - verb(2): - echo iteration, " rsq: ", gs.r2, " -> ", rsq - gs.r2 = rsq - op.preconditioner(gs[nv].vec, gs) - op.apply(gs[nv].Avec, gs[nv].vec) + let gsp = addr gs + threads: + gsp[][0].vec := gsp[][0].alpha * gsp[][0].vec + op.apply(gsp[][0].Avec, gsp[][0].vec) + gsp[][0].Avn = gsp[][0].Avec.norm2 + op.apply(Ap, x) + r := b - Ap + let ctmp = dot(gsp[][0].Avec, r) + gsp[][0].alpha = ctmp / gsp[][0].Avn + r -= gsp[][0].alpha * gsp[][0].Avec + rsq = r.norm2 + verb(2): + echo iteration, " rsq: ", gsp[].r2, " -> ", rsq + gsp[].r2 = rsq + let gsp = addr gs + threads: + op.preconditioner(gsp[][nv].vec, gsp[]) + op.apply(gsp[][nv].Avec, gsp[][nv].vec) gs.orth gs[nv].Avn = gs[nv].Avec.norm2 let ctmp = dot(gs[nv].Avec, r) @@ -239,10 +251,13 @@ proc solve*(gs: var GcrState; opx: var auto; sp: var SolverParams) = # rsq, relnorm2) gs.getx() #gs.fini() - opx = op - #res_arg.final_rsq = rsq div insq - #res_arg.final_rel = relnorm2 - sp.finalIterations = iteration + #opx = op + sp.calls += 1 + sp.iterations += iteration + sp.iterationsMax = max(sp.iterationsMax, iteration) + sp.seconds += getElapsedTime() + sp.flops += 0 + sp.r2.push gs.r2 verb(1): echo "GCR: its: ", iteration, " rsq: ", rsq #return QOP_SUCCESS diff --git a/src/solvers/solverBase.nim b/src/solvers/solverBase.nim index 907b2b1..68924c6 100644 --- a/src/solvers/solverBase.nim +++ b/src/solvers/solverBase.nim @@ -82,11 +82,11 @@ proc addStats*(sp0: var SolverParams, sp1: SolverParams) = sp0.r2 += sp1.r2 proc getStats*(sp: SolverParams, typ0= -1): string = - let c = sp.calls + let c = max(1,sp.calls) let its = sp.iterations let ic = its div c let im = sp.iterationsMax - let s = sp.seconds + let s = max(1e-12, sp.seconds) let sc = s/c.float let f = sp.flops let gf = 1e-9*f/s From 708a386cea510ad490773fe45a11f91aa746643b Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Wed, 17 Apr 2024 11:59:03 -0500 Subject: [PATCH 03/60] simdArray: add missing imadd, imsub, divd overloading --- src/simd/simdArray.nim | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/simd/simdArray.nim b/src/simd/simdArray.nim index 15f6729..c2eb28a 100644 --- a/src/simd/simdArray.nim +++ b/src/simd/simdArray.nim @@ -436,6 +436,9 @@ template makeSimdArray2*(L:typed;B,F:typedesc;N0,N:typed,T:untyped) {.dirty.} = template imadd*(r:var T; x:SomeNumber; y:T) = imadd(r, x.to(type(T)), y) template imsub*(r:var T; x:SomeNumber; y:T) = imsub(r, x.to(type(T)), y) template divd*(r:var T; x:SomeNumber; y:T) = divd(r, x.to(type(T)), y) + template imadd*(r:var T; x:T; y:SomeNumber) = imadd(r, x, y.to(type(T))) + template imsub*(r:var T; x:T; y:SomeNumber) = imsub(r, x, y.to(type(T))) + template divd*(r:var T; x:T; y:SomeNumber) = divd(r, x, y.to(type(T))) template imul*(r:var T; x:SomeNumber) = imul(r, x.to(type(T))) template idiv*(r:var T; x:SomeNumber) = idiv(r, x.to(type(T))) template msub*(r:var T; x:SomeNumber; y,z:T) = msub(r, x.to(type(T)), y, z) From ae36753408057a95ffef3cee16f71036f76eb0bd Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Wed, 17 Apr 2024 12:38:59 -0500 Subject: [PATCH 04/60] array[N,T]: use comlete generic types to work around Nim devel issues --- src/base/stdUtils.nim | 20 ++++++++++---------- tests/base/tshift.nim | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/base/stdUtils.nim b/src/base/stdUtils.nim index e3f97ae..e0430c3 100644 --- a/src/base/stdUtils.nim +++ b/src/base/stdUtils.nim @@ -59,19 +59,19 @@ proc indexOf*[T](x: openArray[T], y: auto): int = let n = x.len while result Date: Tue, 7 May 2024 11:47:30 -0500 Subject: [PATCH 05/60] experimental/symbolic/graph: open for testing and suggestions --- src/experimental/symbolic/graph.nim | 492 ++++++++++++++++++++++++++++ src/graph.nim | 200 ----------- 2 files changed, 492 insertions(+), 200 deletions(-) create mode 100644 src/experimental/symbolic/graph.nim delete mode 100644 src/graph.nim diff --git a/src/experimental/symbolic/graph.nim b/src/experimental/symbolic/graph.nim new file mode 100644 index 0000000..dd63d5b --- /dev/null +++ b/src/experimental/symbolic/graph.nim @@ -0,0 +1,492 @@ +## lazy generic computational graph + +#[ + +requires: `--multimethods:on` + +We want the symbolic graph nodes to be type generic. Therefore we +need dynamic dispatch based on object types at runtime. This +implementation uses Nim's builtin multimethods for this purpose. + +Nim's multimethods may slow down single method dispatch time, which +would affect the performance of the comms module. We need to measure +it to understand the impact. + +The functions, ident and add, are the same as a variable argument +function, in terms of symbolic formulae, but we treat functions +with different number of arguments differently, because the +implementations of the functions would be different. It also avoids +increasing dynamic dispatch overhead. + +Typed values are enclosed in derived types of `SymNodeValueConcrete` +and referenced from nodes, which have a single type. Since Nim +doesn't allow mixing generic functions with methods, we need to +define a method for each and every combination of concrete types +we use, and wrap our existing generic functions in each method. + +Both SymNodeValueConcrete and SymNode are references. In the graph +we build, a shared node means the same variable. We create new +nodes that equal to the existing nodes with the ident function to +create new referenced node objects, in order to avoid false sharing. +We use copySymNodeValue to create new referenced value objects, +such that different nodes refer to distinct value objects. + +We use the tag sntVisited to avoid repeatedly traverse shared nodes. +The recursive graph traversal function all ends with Rec, just to +remind us to call `tagClearRec(z, sntVisited)` in the top level. + +Further optimizations only possible after building all the graphs: +- Remove ident nodes +- Analyze and reuse allocations when possible + +]# + +# +# basic type support +# + +type + SymNodeTag = enum + sntVisited, sntAssigned, sntNeedUpdate, sntNeedGradient, sntFixedGradient + # sntReusable, ... + SymNodeTags = set[SymNodeTag] + SymNodeValue = ref object of RootObj ## Represent unallocated symbolic value + SymNodeValueConcrete = ref object of SymNodeValue ## For any concrete values + SymNodeGradient = object + ## for a particular variable v + dependent: SymNode ## a variable v that depends on this node, x + gradient: SymNode ## dv/dx + SymNode = ref object + # This object can be cyclic, because gradients refer to ancestor nodes + value: SymNodeValue + inputs: seq[SymNode] + forward: proc(z: SymNode) ## runs the actual compute + arg: SymNodeValue ## extra argument forward/backward uses + runCount: int + allocateValue: proc(z: SymNode) + backward: proc(z: SymNode, i: int, dep: SymNode): SymNode ## create graphs + gradients: seq[SymNodeGradient] ## saved gradient graphs + name: string + tag: SymNodeTags + id: int + SymNodeError = object of Defect + SymNodeValueError = object of Defect + +template raiseError(msg: string) = + raise newException(SymNodeError, msg) + +template raiseValueError(msg: string) = + raise newException(SymNodeValueError, msg) + +template raiseErrorBaseMethod(msg: string) = + raise newException( + SymNodeError, + "Base method invoked: " & msg & + "\nMake sure to pass `--multimethods:on` and check there is a custom method for each derived type.") + +method `$`(v: SymNodeValue): string {.base.} = "SymNodeValue" + +func `$`(z: SymNode): string = + z.name & "#" & $z.id + +func nodeRepr(z: SymNode): string +func treeRepr(z: SymNode): string + +method copySymNodeValue(v: SymNodeValue): SymNodeValue {.base.} = + ## nothing to copy + v + +method copySymNodeValue(v: SymNodeValueConcrete): SymNodeValueConcrete = + raiseValueError("Custom method required for concrete value: " & $v) + +proc newSymNode( + value = SymNodeValue(), + inputs: seq[SymNode] = @[], + forward: proc(z: SymNode) = nil, + arg: SymNodeValue = nil, + runCount: int = 0, + allocateValue: proc(z: SymNode) = nil, + backward: proc(z: SymNode, i: int, dep: SymNode): SymNode = nil, + gradients: seq[SymNodeGradient] = @[], + name: string = "", + tag: SymNodeTags = {}): SymNode = + ## Create new SymNode with a unique id. + var id {.global.} = 0 + result = SymNode(value: value, inputs: inputs, forward: forward, arg: arg, runCount: runCount, + allocateValue: allocateValue, backward: backward, gradients: gradients, name: name, tag: tag, id: id) + id.inc + +proc copySymNode(z: SymNode): SymNode = + newSymNode(value = z.value.copySymNodeValue, inputs = z.inputs, forward = z.forward, + arg = if z.arg != nil: z.arg.copySymNodeValue else: nil, runCount = z.runCount, + allocateValue = z.allocateValue, backward = z.backward, gradients = z.gradients, + name = z.name, tag = z.tag) + +proc assignSymNode(z: SymNode, x: SymNode) = + z.value = x.value.copySymNodeValue + z.inputs = x.inputs + z.forward = x.forward + if x.arg != nil: + z.arg = x.arg.copySymNodeValue + z.runCount = x.runCount + z.allocateValue = x.allocateValue + z.backward = x.backward + z.gradients = x.gradients + z.name = x.name + z.tag = x.tag + +proc gradientDependentOrNil(z: SymNode, dep: SymNode): SymNode = + ## May return nil if not exists. + for g in z.gradients: + if dep == g.dependent: + return g.gradient + # We don't have a matching dependent variable. + return nil + +proc gradientDependentAssign(z: SymNode, dep: SymNode, grad: SymNode) = + ## Replace if exists, otherwise add to the list. + for g in z.gradients.mitems: + if dep == g.dependent: + g.gradient = grad + return + z.gradients.add SymNodeGradient(dependent: dep, gradient: grad) + +proc assign(z: SymNode, v: SymNodeValueConcrete) = + z.value = v + z.tag.incl sntAssigned + +# +# generic symbol support +# + +proc newSym(s: string): SymNode = + newSymNode(name = s) + +method identSymNodeValue(z: SymNodeValue, x: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) + +method identAllocateSymNodeValue(z: SymNode, x: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.nodeRepr & "\n " & x.repr) + +method iaddSymNodeValue(z: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & y.repr) + +method iaddAllocateSymNodeValue(z: SymNode, x: SymNodeValue, y: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.nodeRepr & "\n " & x.repr & "\n " & y.repr) + +# +# float support +# + +type SymNodeValueFloat = ref object of SymNodeValueConcrete + floatValue: float + +method `$`(v: SymNodeValueFloat): string = $v.floatValue + +proc assign(z: SymNode, v: float) = + z.assign SymNodeValueFloat(floatValue: v) + +method copySymNodeValue(v: SymNodeValueFloat): SymNodeValueFloat = + SymNodeValueFloat(floatValue: v.floatValue) + +method identSymNodeValue(z: SymNodeValueFloat, x: SymNodeValueFloat) = + z.floatValue = x.floatValue + +method identAllocateSymNodeValue(z: SymNode, x: SymNodeValueFloat) = + z.value = SymNodeValueFloat() + +method iaddSymNodeValue(z: SymNodeValueFloat, x: SymNodeValueFloat, y: SymNodeValueFloat) = + z.floatValue = x.floatValue + y.floatValue + +method iaddAllocateSymNodeValue(z: SymNode, x: SymNodeValueFloat, y: SymNodeValueFloat) = + z.value = SymNodeValueFloat() + +# +# minimum algebra for the nodes +# + +proc ident(x:SymNode): SymNode +proc add(x: SymNode, y: SymNode): SymNode + +proc identForward(z: SymNode) = + identSymNodeValue(z.value, z.inputs[0].value) + +proc identAllocate(z: SymNode) = + identAllocateSymNodeValue(z, z.inputs[0].value) + +proc identBackward(z: SymNode, i: int, dep: SymNode): SymNode = + let g = z.gradientDependentOrNil dep + if g == nil: + return newSymNode(value = SymNodeValueFloat(floatValue: 1.0), name = "One[ident]") + else: + return g.ident + +proc ident(x:SymNode): SymNode = + newSymNode( + inputs = @[x], + forward = identForward, + allocateValue = identAllocate, + backward = identBackward, + name = "ident") + +proc addForward(z: SymNode) = + iaddSymNodeValue(z.value, z.inputs[0].value, z.inputs[1].value) + +proc addAllocate(z: SymNode) = + iaddAllocateSymNodeValue(z, z.inputs[0].value, z.inputs[1].value) + +proc addBackward(z: SymNode, i: int, dep: SymNode): SymNode = + let g = z.gradientDependentOrNil dep + if g == nil: + return newSymNode(value = SymNodeValueFloat(floatValue: 1.0), name = "One[add]") + else: + return g.ident + +proc add(x: SymNode, y: SymNode): SymNode = + newSymNode( + inputs = @[x, y], + forward = addForward, + allocateValue = addAllocate, + backward = addBackward, + name = "add") + +# +# graph traversal and evaluation +# + +proc tagClearRec(z: SymNode, tag: SymNodeTag) = + ## This does not use sntVisited, so it will repeat on shared nodes. + if tag in z.tag: + z.tag.excl tag + for i in z.inputs: + i.tagClearRec tag + +proc allocateRec(z: SymNode) = + if sntVisited notin z.tag: + z.tag.incl sntVisited + for i in z.inputs: + i.allocateRec + if not (z.value of SymNodeValueConcrete): + if z.allocateValue == nil: + raiseError("undefined allocateValue for node: " & z.nodeRepr) + z.allocateValue z + +proc allocate(z: SymNode) = + z.allocateRec + z.tagClearRec sntVisited + +proc tagUpdateRec(z: SymNode) = + if sntVisited in z.tag: + return + z.tag.incl sntVisited + if sntAssigned in z.tag: + z.tag.excl sntAssigned + else: + var needupdate = false + for i in z.inputs: + needupdate = needupdate or sntAssigned in i.tag + i.tagUpdateRec + needupdate = needupdate or sntNeedUpdate in i.tag + if needupdate: + z.tag.incl sntNeedUpdate + +proc evalRec(z: SymNode) = + if sntVisited in z.tag: + if sntNeedUpdate in z.tag: + raiseError "cycle detected" + elif sntNeedUpdate in z.tag: + z.tag.incl sntVisited + for i in z.inputs: + i.evalRec + if z.forward != nil: + z.forward z + z.runCount.inc + elif z.inputs.len > 0: + raiseError("inputs.len: " & $z.inputs.len & ", but no forward function defined for:\n" & z.nodeRepr) + z.tag.excl sntNeedUpdate + +proc eval(z: SymNode) = + z.tagUpdateRec + z.tagClearRec sntVisited + z.evalRec + z.tagClearRec sntVisited + +proc tagUpdateNeedGradientRec(z: SymNode) = + if sntVisited in z.tag: + return + z.tag.incl sntVisited + var needgradient = false + for i in z.inputs: + i.tagUpdateNeedGradientRec + needgradient = needgradient or sntNeedGradient in i.tag + if needgradient and sntNeedGradient notin z.tag: + z.tag.incl sntNeedGradient + +proc gradientRec(z: SymNode, dep: SymNode) = + ## gradient of dep with respect to z + # We tag newly created nodes from z.backward(z, i, dep), with needUpdate. + for i in 0.. 0: + result &= ", inputs: {" + for i in 0.. 0: + result &= ", " + result &= "[" & $i & "]: " & $z.inputs[i] + result &= "}" + if z.gradients.len > 0: + result &= ", gradients: {" + for i in 0.. 0: + result &= ", " + result &= "[" & $i & "]: " & $z.gradients[i].dependent & " -> " & $z.gradients[i].gradient + result &= "}" + if z.value != nil: + result &= ": " & $z.value + +func toStringRec(z: SymNode, pre: string, shared: seq[SymNode]): string = + result = pre & z.nodeRepr + if sntVisited in z.tag: + result &= " [shared]" + else: + z.tag.incl sntVisited + for zid in 0..1: - result = "#" & $id & "=(" - g.refs = -id - inc id - else: - result = "(" - result &= g.str - for x in g.args: - result &= " " & x.go - result &= ")" - else: - result = g.str - result = g.go - -proc newVar[T](x:T, s="$V"):GraphNode[T] = - GraphNode[T](val:GraphValue[T](v:x), str:s, - initGrad: (proc(g:Graph) = g.grad = GraphValue[T](v:1.T))) - -proc newConst[T](x:T, s="$C"):GraphNode[T] = - GraphNode[T](tag: {gtConst}, val:GraphValue[T](v:x), str:s & "|" & $x & "|", - initGrad: (proc(g:Graph) = g.grad = GraphValue[T](v:1.T))) - -proc eval(g:Graph) = - if g.isop and gtRun notin g.tag: - g.tag.incl gtRun - for x in g.args: - x.eval - g.run(g) - -proc clearGrad(g:Graph) = - g.grad = nil - g.tag.excl gtDF - g.tag.excl gtGrad - if g.isop: - for x in g.args: - x.clearGrad - -proc evalGrad(g:Graph) = - if g.isop and gtRun notin g.tag: - g.eval - if gtDF notin g.tag: - g.clearGrad - g.countRefs - g.tag.incl gtDF - g.initGrad(g) - proc go(g:Graph) = - g.tag.incl gtGrad - if g.isop: - g.refs.dec - if g.refs>0: - return - # Wait until the last reference of the shared nodes. - g.back(g) - for x in g.args: - x.go - g.go - -proc evalGrad[G,X](g:GraphNode[G], x:GraphNode[X]):X = - # TODO only descend in to nodes that contains x. - g.Graph.evalGrad - proc go(g:Graph):Graph = - if g == x: - return x - elif g.isop: - for c in g.args: - if c.go == x: - return x - g - if g.go == x: - GraphValue[X](x.grad).v - else: - X 0 - -proc eval[T](g:GraphNode[T]):T = - g.Graph.eval - GraphValue[T](g.val).v - -proc `+`[X,Y](x:GraphNode[X], y:GraphNode[Y]):auto = - type R = type(GraphValue[X](x.val).v+GraphValue[Y](y.val).v) - GraphNode[R](isop:true, str:"+", args: @[x.Graph,y], - run: (proc(g:Graph) = - echo "# Run: ",g.args[0].str," + ",g.args[1].str - let v = GraphValue[R](v:GraphValue[X](g.args[0].val).v+GraphValue[Y](g.args[1].val).v) - g.val = v - g.str &= "(=" & $v.v & ")"), - initGrad: (proc(g:Graph) = g.grad = GraphValue[R](v:1.R)), - back: (proc(g:Graph) = - echo "# Back: ",x.str," * ",y.str - if gtConst notin g.args[0].tag: - let t = GraphValue[R](g.grad).v.X - if g.args[0].grad != nil: - g.args[0].grad = GraphValue[X](v:GraphValue[X](g.args[0].grad).v+t) - else: - g.args[0].grad = GraphValue[X](v:t) - if gtConst notin g.args[1].tag: - let t = GraphValue[R](g.grad).v.Y - if g.args[1].grad != nil: - g.args[1].grad = GraphValue[Y](v:GraphValue[Y](g.args[1].grad).v+t) - else: - g.args[1].grad = GraphValue[Y](v:t))) -proc `+`[X](x:GraphNode[X], y:SomeNumber):auto = x + newConst(y) -proc `+`[Y](x:SomeNumber, y:GraphNode[Y]):auto = newConst(x) + y - -proc `*`[X,Y](x:GraphNode[X], y:GraphNode[Y]):auto = - type R = type(GraphValue[X](x.val).v*GraphValue[Y](y.val).v) - GraphNode[R](isop:true, str:"*", args: @[x.Graph,y], - run: (proc(g:Graph) = - echo "# Run: ",x.str," * ",y.str - let v = GraphValue[R](v:GraphValue[X](g.args[0].val).v*GraphValue[Y](g.args[1].val).v) - g.val = v - g.str &= "(=" & $v.v & ")"), - initGrad: (proc(g:Graph) = g.grad = GraphValue[R](v:1.R)), - back: (proc(g:Graph) = - echo "# Back: ",x.str," * ",y.str - if gtConst notin g.args[0].tag: - let t = GraphValue[R](g.grad).v*GraphValue[Y](g.args[1].val).v - if g.args[0].grad != nil: - g.args[0].grad = GraphValue[X](v:GraphValue[X](g.args[0].grad).v+t) - else: - g.args[0].grad = GraphValue[X](v:t) - if gtConst notin g.args[1].tag: - let t = GraphValue[X](g.args[0].val).v*GraphValue[R](g.grad).v - if g.args[1].grad != nil: - g.args[1].grad = GraphValue[X](v:GraphValue[X](g.args[1].grad).v+t) - else: - g.args[1].grad = GraphValue[Y](v:t))) -proc `*`[X](x:GraphNode[X], y:SomeNumber):auto = x * newConst(y) -proc `*`[Y](x:SomeNumber, y:GraphNode[Y]):auto = newConst(x) * y - -when isMainModule: - let - x = newVar(2.0, "x") - y = newVar(3.0, "y") - z = x*(y+5.0) - t = x*y*(z+1.0)*z - echo "x: ",x - echo "y: ",y - echo "z: ",z - echo "t: ",t - let rt = t.eval - echo "t: ",t - echo "rt: ",rt - echo "rz: ",z.eval - echo "dtdz: ",t.evalGrad z - echo "dtdx: ",t.evalGrad x - echo "dtdy: ",t.evalGrad y - echo "dzdx: ",z.evalGrad x - echo "dzdy: ",z.evalGrad y - let u = (x+t)*(t+z) - echo "u: ",u - echo "dudy: ",u.evalGrad y From fd5f6116bb2631a4ca47de9fd8607777b344c90b Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Fri, 31 May 2024 23:48:51 -0500 Subject: [PATCH 06/60] experimental/symbolic: momentum works for now --- src/experimental/symbolic/gauge.nim | 246 ++++++++++++++++++++++++++++ src/experimental/symbolic/graph.nim | 96 ++++++----- 2 files changed, 300 insertions(+), 42 deletions(-) create mode 100644 src/experimental/symbolic/gauge.nim diff --git a/src/experimental/symbolic/gauge.nim b/src/experimental/symbolic/gauge.nim new file mode 100644 index 0000000..a933a32 --- /dev/null +++ b/src/experimental/symbolic/gauge.nim @@ -0,0 +1,246 @@ +import experimental/symbolic/graph +import ../../gauge, physics/qcdTypes + +# +# gauge support +# + +# TODO: needs more thought +type Gauge = seq[DLatticeColorMatrixV] + +type SymNodeValueGauge = ref object of SymNodeValueConcrete + gaugeValue: Gauge + +method getGauge*(v: SymNodeValue): Gauge {.base.} = + raiseValueError("Custom method required for value: " & $v) + +method getGauge*(v:SymNodeValueGauge): Gauge = v.gaugeValue + +method `$`*(v: SymNodeValueGauge): string = "gaugeValue" + +proc assign*(z: SymNode, v: Gauge) = + z.assign SymNodeValueGauge(gaugeValue: v) + +method copySymNodeValue*(v: SymNodeValueGauge): SymNodeValueGauge = + # TODO: we don't need this, if we don't take gradient after assign + let z = v.gaugeValue[0].l.newGauge + threads: + for mu in 0.. SymNode + +proc `*`(x: float, y: SymNode): SymNode = + return newSymNode(value = SymNodeValueFloat(floatValue: x)).mul y + +proc `*`(x: SymNode, y: float): SymNode = + return x.mul newSymNode(value = SymNodeValueFloat(floatValue: y)) + +when isMainModule: + let p = newSym("p") + let k = 0.5 * p.norm2 + let dkdp = k.gradient p + + import qex + import os, strutils + + qexInit() + type GaugeActType = enum ActWilson, ActAdjoint, ActRect, ActSymanzik, ActIwasaki, ActDBW2 + converter toGaugeActType(s:string):GaugeActType = parseEnum[GaugeActType](s) + letParam: + gaugefile = "" + savefile = "config" + savefreq = 10 + lat = + if fileExists(gaugefile): + getFileLattice gaugefile + else: + if gaugefile.len > 0: + qexWarn "Nonexistent gauge file: ", gaugefile + @[4,4,4,8] + gact:GaugeActType = "ActWilson" + beta = 6.0 + adjFac = -0.25 + rectFac = -1.4088 + seed:uint64 = 4321 + echoParams() + echo "rank ", myRank, "/", nRanks + threads: echo "thread ", threadNum, "/", numThreads + + let + gc = case gact + of ActWilson: GaugeActionCoeffs(plaq: beta) + of ActAdjoint: GaugeActionCoeffs(plaq: beta, adjplaq: beta*adjFac) + of ActRect: gaugeActRect(beta, rectFac) + of ActSymanzik: Symanzik(beta) + of ActIwasaki: Iwasaki(beta) + of ActDBW2: DBW2(beta) + lo = lat.newLayout + vol = lo.physVol + + echo gc + var r = lo.newRNGField(MRG32k3a, seed) + var R:MRG32k3a # global RNG + R.seed(seed, 987654321) + + var mom = lo.newgauge + threads: + mom.randomTAH r + + p.assign mom + optimize(output = @[k,dkdp], variables = @[p]) + + k.allocate + dkdp.allocate + echo k.treerepr + echo dkdp.treerepr + + k.eval + dkdp.eval + + echo k.value + let dp = dkdp.value.getGauge + var d2 = 0.0 + threads: + var d2t = 0.0 + for mu in 0.. Date: Tue, 4 Jun 2024 16:29:29 -0500 Subject: [PATCH 07/60] remove master branch from installNim add norm2diff for fields --- build/installNim | 6 +++--- src/field/fieldET.nim | 33 +++++++++++++++++++++++++++++++++ src/solvers/cg.nim | 1 + 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/build/installNim b/build/installNim index 769bba4..b4c077f 100755 --- a/build/installNim +++ b/build/installNim @@ -10,7 +10,7 @@ usage() { echo " -h (this help message)" echo " stable (install latest stable version)" echo " (install named version, e.g. 0.16.0)" - echo " master (install master branch tracking version)" + #echo " master (install master branch tracking version)" echo " devel (install devel branch tracking version)" echo " default stable||master|devel" echo " (set default version)" @@ -35,11 +35,11 @@ if [ "X$1" = "Xdefault" ]; then fi ver="stable" -branch="master" +branch="version-2-0" if [ "X$1" != "X" ]; then case "$1" in stable) ver="stable";; - master) ver="master";; + #master) ver="master";; devel) ver="devel"; branch="devel";; -h) usage;; *) ver="$1"; branch="v$1";; diff --git a/src/field/fieldET.nim b/src/field/fieldET.nim index ab81dbd..c8a6057 100644 --- a/src/field/fieldET.nim +++ b/src/field/fieldET.nim @@ -638,6 +638,39 @@ proc norm2subtract*(x: Field, y: float): float = result = s.simdReduce x.l.threadRankSum(result) +proc norm2diffP*(f,g:SomeField):auto = + tic() + mixin norm2, inorm2, simdSum, items, toDouble + #var n2:type(norm2(f[0])) + var n2: evalType(norm2(toDouble(f[0]))) + #echo n2 + #let t = f + for x in items(f): + let t = toDouble(f[x]) - toDouble(g[x]) + inorm2(n2, t) + toc("norm2 local") + #echoAll n2 + result = simdSum(n2) + toc("norm2 simd sum") + #echoAll myRank, ",", threadNum, ": ", result + #threadSum(result) + #toc("norm2 thread sum") + #rankSum(result) + #toc("norm2 rank sum") + f.l.threadRankSum(result) + #echo result + toc("norm2 thread rank sum") +template norm2diff*(f,g:SomeAllField):auto = + when declared(subsetObject): + #echo "subsetObj" & s + norm2diffP(f[subsetObject], g[subsetObject]) + elif declared(subsetString): + #echo "subset norm2" + norm2diffP(f[subsetString], g[subsetString]) + else: + norm2diffP(f, g) +template norm2diff*(f,g:Subsetted):auto = norm2diffP(f,g) + proc dotP*(f1:SomeField; f2:SomeField2):auto = tic() mixin dot, idot, simdSum, items, toDouble, eval diff --git a/src/solvers/cg.nim b/src/solvers/cg.nim index 16ca02b..f3237f3 100644 --- a/src/solvers/cg.nim +++ b/src/solvers/cg.nim @@ -21,6 +21,7 @@ type proc reset*(cgs: var CgState) = cgs.b2 = -1 cgs.iterations = 0 + cgs.r2 = 1.0 cgs.r2old = 1.0 cgs.rzold = 1.0 cgs.r2stop = 0.0 From aab7a7c5839986ef5081de46db7102d1c6655f9f Mon Sep 17 00:00:00 2001 From: James Osborn Date: Wed, 5 Jun 2024 22:24:34 -0500 Subject: [PATCH 08/60] added coupled harmonic oscillator example --- src/examples/harmonic.nim | 96 +++++++++++++++++++++++++++++++++++++++ src/field/fieldET.nim | 5 +- tests/diffnum | 2 +- 3 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 src/examples/harmonic.nim diff --git a/src/examples/harmonic.nim b/src/examples/harmonic.nim new file mode 100644 index 0000000..affae60 --- /dev/null +++ b/src/examples/harmonic.nim @@ -0,0 +1,96 @@ +# coupled harmonic oscillators example (currently only 1d degree of freedom) +# H = (1/2) sum_ (x_i - x_j)^2 +import qex, strformat + +qexInit() + +let + lat = intSeqParam("lat", @[16]) + lo = lat.newLayout + seed = uint64 intParam("seed", 1) + ntraj = intParam("ntraj", 2) + nsteps = intParam("nsteps", 2) + tau = floatParam("tau", 1) + +var + nAccept = 0 + x = lo.Real + p = lo.Real + xSave = lo.Real + #pSave = lo.Real + rng = lo.newRNGField(MRG32k3a, seed) + globalRng: MRG32k3a # global RNG +globalRng.seed(seed, 987654321) + +proc refreshMomentum(p: auto) = + threads: + p.gaussian rng + +proc action(p,x: auto): float = + var sp, sx: float + threads: + sp := 0.5*p.norm2 + var t = newShifters(x, 1) + let nd = lo.nDim + for mu in 0..2: rel = 1e-12 if len(sys.argv)>3: rel = float(sys.argv[3]) -spchars = " \t[\]," +spchars = " \t[\\]," if len(sys.argv)>4: spchars = sys.argv[4] #print(fn1, fn2, rel, repr(spchars)) From cca9f02ab3c366602af4ebd4595f6d6083b29de5 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Wed, 12 Jun 2024 16:14:00 -0500 Subject: [PATCH 09/60] add some output --- src/examples/harmonic.nim | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/examples/harmonic.nim b/src/examples/harmonic.nim index affae60..8d5bbbb 100644 --- a/src/examples/harmonic.nim +++ b/src/examples/harmonic.nim @@ -22,6 +22,10 @@ var globalRng: MRG32k3a # global RNG globalRng.seed(seed, 987654321) +echo "ntraj: ", ntraj +echo "nsteps: ", nsteps +echo "tau: ", tau + proc refreshMomentum(p: auto) = threads: p.gaussian rng @@ -70,6 +74,7 @@ proc printObservables(x: auto) = threads: x := 0 +var ds2 = 0.0 for traj in 1..ntraj: refreshMomentum(p) threads: @@ -79,6 +84,7 @@ for traj in 1..ntraj: evolve(p, x) let s1 = action(p, x) let ds = s1 - s0 + ds2 += ds*ds let pacc = exp(-ds) let r = globalRng.uniform if r <= pacc: # accept @@ -93,4 +99,5 @@ for traj in 1..ntraj: printObservables(x) echo "Acceptance ratio: ", nAccept/ntraj +echo "ds2: ", ds2/ntraj qexFinalize() From 34c4aecf08f0a9ed9a2d4858e77302b74e245f23 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Thu, 13 Jun 2024 13:48:42 -0500 Subject: [PATCH 10/60] Fix noOpenmp build --- src/base/omp.nim | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/base/omp.nim b/src/base/omp.nim index 12c17b0..80b803b 100644 --- a/src/base/omp.nim +++ b/src/base/omp.nim @@ -3,11 +3,11 @@ import os when defined(noOpenmp): static: echo "OpenMP disabled" template omp_set_num_threads*(x: cint) = discard - template omp_get_num_threads*(): cint = 1 - template omp_get_max_threads*(): cint = 1 - template omp_get_thread_num*(): cint = 0 - template ompPragma(p:string):untyped = discard - template ompBlock*(p:string; body:untyped):untyped = + template omp_get_num_threads*(): cint = cint 1 + template omp_get_max_threads*(): cint = cint 1 + template omp_get_thread_num*(): cint = cint 0 + template ompPragma(p:string) = discard + template ompBlock*(p:string; body:untyped) = block: body else: @@ -24,11 +24,11 @@ else: proc omp_get_max_threads*(): cint {.omp.} proc omp_get_thread_num*(): cint {.omp.} #proc forceOmpOn() {.omp.} - template ompPragma(p:string):untyped = + template ompPragma(p:string) = #forceOmpOn() #{. emit:["#pragma omp ", p] .} {. emit:["_Pragma(\"omp ", p, "\")"] .} - template ompBlock*(p:string; body:untyped):untyped = + template ompBlock*(p:string; body:untyped) = #{. emit:"#pragma omp " & p .} #{. emit:"{ /* Inserted by ompBlock " & p & " */".} #{. emit:["#pragma omp ", p] .} @@ -39,14 +39,14 @@ else: template ompBarrier* = ompPragma("barrier") -template ompParallel*(body:untyped):untyped = +template ompParallel*(body:untyped) = ompBlock("parallel"): if(omp_get_thread_num()!=0): setupForeignThreadGc() body -template ompMaster*(body:untyped):untyped = ompBlock("master", body) -template ompSingle*(body:untyped):untyped = ompBlock("single", body) -template ompCritical*(body:untyped):untyped = ompBlock("critical", body) +template ompMaster*(body:untyped) = ompBlock("master", body) +template ompSingle*(body:untyped) = ompBlock("single", body) +template ompCritical*(body:untyped) = ompBlock("critical", body) when isMainModule: proc test = From e466c732820b31dd83b48dc2c4996c7db3f0cadd Mon Sep 17 00:00:00 2001 From: James Osborn Date: Mon, 17 Jun 2024 15:43:18 -0500 Subject: [PATCH 11/60] replace wget with curl --- bootstrap-travis | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap-travis b/bootstrap-travis index c8554b7..41b23a2 100755 --- a/bootstrap-travis +++ b/bootstrap-travis @@ -3,7 +3,7 @@ qmpv="qmp-2.5.4" qmp="$qmpv.tar.gz" if [ ! -f $qmp ]; then - wget "http://usqcd-software.github.io/downloads/qmp/$qmp" + curl -LO "http://usqcd-software.github.io/downloads/qmp/$qmp" fi tar zxvf $qmp mkdir qmp @@ -19,7 +19,7 @@ cd .. qiov="qio-3.0.0" qio="$qiov.tar.gz" if [ ! -f $qio ]; then - wget "http://usqcd-software.github.io/downloads/qio/$qio" + curl -LO "http://usqcd-software.github.io/downloads/qio/$qio" fi tar zxvf $qio mkdir qio From 85e88112fcbcfc9f617bd569bacb29d58e1aa4ef Mon Sep 17 00:00:00 2001 From: James Osborn Date: Mon, 17 Jun 2024 15:57:28 -0500 Subject: [PATCH 12/60] allow bootstrap without MPI --- bootstrap-travis | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/bootstrap-travis b/bootstrap-travis index 41b23a2..a6f89c4 100755 --- a/bootstrap-travis +++ b/bootstrap-travis @@ -1,5 +1,12 @@ #!/bin/sh +comms="mpi" +cc="mpicc" +if [ "X$1" = "Xsingle" ]; then + comms="single" + cc="gcc" +fi + qmpv="qmp-2.5.4" qmp="$qmpv.tar.gz" if [ ! -f $qmp ]; then @@ -10,8 +17,8 @@ mkdir qmp cd $qmpv ./configure \ --prefix="$PWD/../qmp" \ - --with-qmp-comms-type=mpi \ - CC=mpicc \ + --with-qmp-comms-type=$comms \ + CC=$cc \ CFLAGS="-Wall -O3 -std=gnu99 -g -fPIC" make && make install cd .. @@ -27,7 +34,7 @@ cd $qiov ./configure \ --prefix="$PWD/../qio" \ --with-qmp="$PWD/../qmp" \ - CC=mpicc \ + CC=$cc \ CFLAGS="-Wall -O3 -std=gnu99 -g -fPIC" make && make install cd .. From 969e3edd43aaaaf649e386c8b3bedeb8faf68447 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Tue, 18 Jun 2024 10:26:37 -0500 Subject: [PATCH 13/60] add missing file --- src/observables/sources.nim | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/observables/sources.nim diff --git a/src/observables/sources.nim b/src/observables/sources.nim new file mode 100644 index 0000000..ed22ff2 --- /dev/null +++ b/src/observables/sources.nim @@ -0,0 +1,18 @@ +import field, comms/comms + +proc wallSource*(f: Field, t0: int, v: auto) = + for i in f.l.sites: + let t = f.l.coords[^1][i] + if t == t0: + f{i} := v + +proc norm2slice*(f: SomeField, s: int): seq[float] = + #for i in l: + # let t = l.vcoords[3][i][0] + let ns = f.l.physGeom[s] + var c = newSeq[float](ns) + for i in f.sites: + let k = f.l.coords[s][i] + c[k] += f{i}.norm2 + threadRankSum c + c From 41f9a9d7a0577f4650990d1cc2446eb95ee4da26 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Wed, 19 Jun 2024 11:52:12 -0500 Subject: [PATCH 14/60] start adding Hasenbusch to staggered autograd --- src/experimental/stagag.nim | 164 +++++++++++++++++++++++++++--------- src/hmc/agradOps.nim | 25 +++++- 2 files changed, 148 insertions(+), 41 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 982ee36..e27b4ce 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -25,8 +25,9 @@ let warmmd = (intParam("warmmd", 1) != 0) ntrain = intParam("ntrain", 10) trajs = intParam("trajs", 10) - nf = floatParam("nf", 1) + nf = intParam("nf", 1) mass = floatParam("mass", 0.1) + hmasses = floatSeqParam("hmasses") # Hasenbusch masses arsq = floatParam("arsq", 1e-20) frsq = floatParam("frsq", 1e-12) seed0 = defaultComm.broadcast(int(1000*epochTime())) @@ -77,6 +78,7 @@ echoparam(ntrain) echoparam(trajs) echoparam(nf) echoparam(mass) +echoparam(hmasses) echoparam(arsq) echoparam(frsq) echoparam(seed) @@ -112,12 +114,16 @@ var p = lo.newgauge f = lo.newgauge g0 = lo.newgauge - phi = lo.ColorVector() - psi = lo.ColorVector() + phi = newseq[typeof(lo.ColorVector())](1+hmasses.len) + psi = newseq[typeof(lo.ColorVector())](1+hmasses.len) + ftmp = lo.ColorVector() +for i in 0.. Date: Sun, 23 Jun 2024 11:18:45 -0500 Subject: [PATCH 15/60] Don't allow nested threads Start fixing hmass gradient --- src/base/profile.nim | 21 ++++++----- src/base/threading.nim | 1 + src/experimental/stagag.nim | 70 ++++++++++++++++++++++++++----------- src/hmc/agradOps.nim | 19 ++++++++++ 4 files changed, 82 insertions(+), 29 deletions(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index 2f9a366..eef708d 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -171,7 +171,8 @@ const var rtiStack = newListOfCap[RTInfoObj](defaultRTICap) - cpHeap = newSeqOfCap[CodePointObj](defaultRTICap) + #cpHeap = newSeqOfCap[CodePointObj](defaultRTICap) + cpHeap = newListOfCap[CodePointObj](defaultRTICap) frozenTimers = false proc timersFrozen*:bool = frozenTimers @@ -355,10 +356,12 @@ template ticI(n = -1; s:SString = "") = localTic {.inject, used.} = prevRTI when noTicToc: - template tic*() = discard - template tic*(n: int) = discard - template tic*(s: SString) = discard - template tic*(n: int; s: SString) = discard + template tic0 = + var localTimerStart {.inject,used.} = getTics() + template tic*() = tic0 + template tic*(n: int) = tic0 + template tic*(s: SString) = tic0 + template tic*(n: int; s: SString) = tic0 else: template tic*(n = -1; s:SString = "") = ticI(n-1,s) template tic*(s:SString = "") = ticI(-2,s) @@ -483,10 +486,10 @@ else: template toc*(n:int) = tocI(0, "", n-1) template toc*() = tocI(0, "", -2) -when noTicToc: - template getElapsedTime*: float = 0.0 -else: - template getElapsedTime*: float = ticDiffSecs(getTics(), localTimerStart) +#when noTicToc: +# template getElapsedTime*: float = 0.0 +#else: +template getElapsedTime*: float = ticDiffSecs(getTics(), localTimerStart) proc reset(x:var RTInfoObj) = x.nsec = 0 diff --git a/src/base/threading.nim b/src/base/threading.nim index 803f691..631ef61 100644 --- a/src/base/threading.nim +++ b/src/base/threading.nim @@ -56,6 +56,7 @@ template emitStackTrace: untyped = template threads*(body:untyped):untyped = checkInit() + doAssert(numThreads==1) let tidOld = threadNum let nidOld = numThreads let tlOld = threadLocals diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index e27b4ce..db1ac9a 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -15,6 +15,7 @@ let tau = floatParam("tau", 0.04) fixtau = (intParam("fixtau",0) != 0) fixparams = (intParam("fixparams",0) != 0) + fixhmasses = (intParam("fixhmasses",0) != 0) md = stringParam("md", "aba") upit = intParam("upit", 1) anneal = floatParam("anneal", 0.99) @@ -114,12 +115,17 @@ var p = lo.newgauge f = lo.newgauge g0 = lo.newgauge - phi = newseq[typeof(lo.ColorVector())](1+hmasses.len) - psi = newseq[typeof(lo.ColorVector())](1+hmasses.len) + eta = newseq[typeof(lo.ColorVector())](1+hmasses.len) # random gaussian + phi = newseq[typeof(lo.ColorVector())](1+hmasses.len) # pseudofermion + psi = newseq[typeof(lo.ColorVector())](1+hmasses.len) # intermediate + ptmp = newseq[typeof(lo.ColorVector())](hmasses.len) # intermediate ftmp = lo.ColorVector() for i in 0..0: costg = 0 + if fixhmasses and i>0 and i<=hmasses.len: costg = 0 + if fixparams and i>hmasses.len: costg = 0 #if i > 0: # costg = (m.hOld-m.hNew)*params[i].grad result.add costg @@ -1503,6 +1527,7 @@ block: src.wallSource(0, v) #echo src.norm2slice(3) +var gfStats: RunningStat proc measure = #threads: # g.rephase @@ -1521,7 +1546,12 @@ proc measure = #for i in 0.. Date: Mon, 24 Jun 2024 21:40:25 -0500 Subject: [PATCH 16/60] fix hmass gradient --- src/comms/comms.nim | 7 ++ src/experimental/stagag.nim | 230 +++++++++++++++++++++++------------- src/hmc/agradOps.nim | 40 +++++++ 3 files changed, 197 insertions(+), 80 deletions(-) diff --git a/src/comms/comms.nim b/src/comms/comms.nim index 6b6e69b..9e4be9a 100644 --- a/src/comms/comms.nim +++ b/src/comms/comms.nim @@ -70,8 +70,13 @@ template allReduce*(c: Comm, x: var UncheckedArray[float64], n: int) = template pushSend*(c: Comm, rank: int, xx: SomeNumber) = var x = xx pushSend(c, rank, &&x, sizeof(x)) +template pushSend*(c: Comm, rank: int, x: object) = + pushSend(c, rank, &&x, sizeof(x)) template pushSend*(c: Comm, rank: int, x: seq) = pushSend(c, rank, &&x[0], x.len*sizeof(x[0])) +template waitSend*(c: Comm) = + let n = c.nsends - 1 + c.waitSends(n, n) template waitSends*(c: Comm) = c.waitSends(0, c.nsends-1) template waitSends*(c: Comm, k: int) = @@ -81,6 +86,8 @@ template waitSends*(c: Comm, k: int) = template pushRecv*(c: Comm, rank: int, x: SomeNumber) = pushRecv(c, rank, &&x, sizeof(x)) +template pushRecv*(c: Comm, rank: int, x: object) = + pushRecv(c, rank, &&x, sizeof(x)) template pushRecv*(c: Comm, rank: int, x: seq) = pushRecv(c, rank, &&x[0], x.len*sizeof(x[0])) template freeRecvs*(c: Comm; k: int) = diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index db1ac9a..6cb9031 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -1,5 +1,5 @@ import qex, gauge, physics/[qcdTypes,stagSolve] -import times, macros, algorithm +import times, macros, algorithm, sequtils import hmc/metropolis import observables/sources import quda/qudaWrapper @@ -118,14 +118,11 @@ var eta = newseq[typeof(lo.ColorVector())](1+hmasses.len) # random gaussian phi = newseq[typeof(lo.ColorVector())](1+hmasses.len) # pseudofermion psi = newseq[typeof(lo.ColorVector())](1+hmasses.len) # intermediate - ptmp = newseq[typeof(lo.ColorVector())](hmasses.len) # intermediate ftmp = lo.ColorVector() for i in 0..=0 and i0 and i<=hmasses.len: costg = 0 @@ -1377,16 +1415,24 @@ proc checkGrad(m: Met) = gx := g #let tg = vtau.grad #let f0 = vtau.obj * exp(m.hOld-m.hNew) + #let h0 = m.hOld let eps = 1e-6 var gs = newSeq[float](0) for i in 0.. 0: echo "Starting warmups" @@ -1617,12 +1684,12 @@ block: tic() m.update getGrad(m) + let tup = getElapsedTime() + measure() if upit > 0: if n mod upit == 0: updateParams(sqrt(float upit)*lrate) lrate *= anneal - let tup = getElapsedTime() - measure() let ttot = getElapsedTime() echo "End trajectory update: ", tup, " measure: ", ttot-tup, " total: ", ttot let et = getElapsedTime() @@ -1631,6 +1698,7 @@ block: #let at = gutime + gftime + fftime #echo &"gu: {gutime} gf: {gftime} ff: {fftime} ot: {et-at} tt: {et}" +resetMeasure() if trajs > 0: m.clearStats pacc.clear @@ -1645,7 +1713,9 @@ if trajs > 0: #echo "cost: ", nff/(vtau.obj*vtau.obj*m.avgPAccept) echo "cost: ", getCost0(m) let tup = getElapsedTime() - echo "End inference: ", tup + measure() + let ttot = getElapsedTime() + echo "End inference update: ", tup, " measure: ", ttot-tup, " total: ", ttot let et = getElapsedTime() toc() echo "Inference time: ", et diff --git a/src/hmc/agradOps.nim b/src/hmc/agradOps.nim index c57edbb..4514fb9 100644 --- a/src/hmc/agradOps.nim +++ b/src/hmc/agradOps.nim @@ -53,6 +53,26 @@ proc peqmul[G:GaugeF](r: G, x: float, y: G) = for s in r[mu]: r[mu][s] += x * y[mu][s] +proc assigngrad(c: float, r: var float) = + r += c +proc assignfwd[I,O](op: AgOp[I,O]) {.nimcall.} = + #mixin peq + op.outputs.obj := op.inputs.maybeObj + when op.inputs is AgVar: + zero op.inputs.grad +proc assignbck[I,O](op: AgOp[I,O]) {.nimcall.} = + #mixin peq + when op.inputs is AgVar: + if op.inputs.doGrad: + assigngrad(op.outputs.grad, op.inputs.grad) +proc assign(c: var AgTape, r: AgVar, x: auto) = + var op = newAgOp(x, r, assignfwd, assignbck) + c.add op +template assign*(r: AgVar, x: auto) = + r.ctx.assign(r, x) +template `:=`*(r: AgVar, x: auto) = + r.ctx.assign(r, x) + proc addgrad1(c: float, r: var float, y: float) = r += c proc addgrad2(c: float, x: float, r: var float) = @@ -229,6 +249,26 @@ proc divd(c: var AgTape, r: AgVar, x: auto, y: auto) = template divd*(r: AgVar, x: auto, y: auto) = r.ctx.divd(r, x, y) +proc peqgrad(c: float, r: var float) = + r += c +proc peqfwd[I,O](op: AgOp[I,O]) {.nimcall.} = + #mixin peq + op.outputs.obj += op.inputs.maybeObj + when op.inputs is AgVar: + zero op.inputs.grad +proc peqbck[I,O](op: AgOp[I,O]) {.nimcall.} = + #mixin peq + when op.inputs is AgVar: + if op.inputs.doGrad: + peqgrad(op.outputs.grad, op.inputs.grad) +proc peq(c: var AgTape, r: AgVar, x: auto) = + var op = newAgOp(x, r, peqfwd, peqbck) + c.add op +template peq*(r: AgVar, x: auto) = + r.ctx.peq(r, x) +template `+=`*(r: AgVar, x: auto) = + r.ctx.peq(r, x) + proc mulna[G:GaugeF](r: G, x: G, y: G) = threads: for mu in 0.. Date: Tue, 25 Jun 2024 09:36:01 -0500 Subject: [PATCH 17/60] fix warning echo fixhmasses --- src/comms/commsUtils.nim | 4 ++-- src/experimental/stagag.nim | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/comms/commsUtils.nim b/src/comms/commsUtils.nim index 647c0ad..1d02719 100644 --- a/src/comms/commsUtils.nim +++ b/src/comms/commsUtils.nim @@ -342,8 +342,8 @@ macro rankMax*(a:varargs[untyped]):auto = qmpMax(`a0`) else: error("rankMax not imlemented for multiple arguments.") - result = newCall(ident("rankMaxN")) - for v in a: result.add v + #result = newCall(ident("rankMaxN")) + #for v in a: result.add v template threadRankMax1*(a:untyped):untyped = var ta{.global.}:type(a) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 6cb9031..54ca6c1 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -66,6 +66,7 @@ echoparam(beta) echoparam(tau) echoparam(fixtau) echoparam(fixparams) +echoparam(fixhmasses) echoparam(md) echoparam(upit) echoparam(lrate) From 59ede49ba537ccdb4f9d98b38ee1c77dc2710acb Mon Sep 17 00:00:00 2001 From: James Osborn Date: Fri, 28 Jun 2024 13:04:13 -0500 Subject: [PATCH 18/60] force tau gradient to be negative for large deltaH --- src/experimental/stagag.nim | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 54ca6c1..d6aee0a 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -1394,7 +1394,7 @@ proc getCost(m: Met): seq[float] = # grad (p = min(1,exp(ho-hn))) -> 0 or - p grad(hn) var pg = 0.0 if m.hNew > m.hOld: - pg = - m.pAccept * params[i].grad + pg = - pm * params[i].grad paccg[i].push pg #pg = paccg[i].mean var costg = (if i==0: 2.0*nsteps*ct*pm else: 0.0) @@ -1404,6 +1404,9 @@ proc getCost(m: Met): seq[float] = #costg += alp*d*(d*pg + 2*(pm-1)*params[i].grad) costg = costg/fc - (ct*ct*pm/(fc*fc))*forceCostGrad(i) #costg = nff*costg*(cost*cost) # extra - to make it minimize + if i==0: + if m.deltaH > 10: + costg = -0.1 if fixtau and i==0: costg = 0 if fixhmasses and i>0 and i<=hmasses.len: costg = 0 if fixparams and i>hmasses.len: costg = 0 From 9ebe1ced5e303c93a768e6afc966061bcdfcce9e Mon Sep 17 00:00:00 2001 From: James Osborn Date: Sat, 29 Jun 2024 21:02:12 -0500 Subject: [PATCH 19/60] add separate learning rate for hmasses --- src/experimental/stagag.nim | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index d6aee0a..0fd20e8 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -37,6 +37,7 @@ let outfn = stringParam("outfn", "") var lrate = floatParam("lrate", 0.001) + lrateh = floatParam("lrateh", lrate) pt0 = floatParam("t0", 0) pg0 = floatParam("g0", 0) pf0 = floatParam("f0", 0) @@ -70,6 +71,7 @@ echoparam(fixhmasses) echoparam(md) echoparam(upit) echoparam(lrate) +echoparam(lrateh) echoparam(anneal) echoparam(alpha) echoparam(checkg) @@ -1475,7 +1477,7 @@ proc getGrad(m: Met) = #echo "costg: ", cgstat[i].mean, " ", cst[i+1] echo &"costg: {m/v:8.6f} {m:10.6f} {cst[i+1]}" -proc updateParams(rate: float) = +proc updateParams(ratefac: float) = let eps = 1e-8 let n = cgstat.len #var s = 0.0 @@ -1492,6 +1494,9 @@ proc updateParams(rate: float) = echo "Params: ", p #s = rate*sqrt(n/s) for i in 0..0 and i<=hmasses.len: + rate = ratefac * lrateh #let m = cgstat[i].mean #let d = s*g[i] let d = rate*g[i] @@ -1692,8 +1697,9 @@ block: measure() if upit > 0: if n mod upit == 0: - updateParams(sqrt(float upit)*lrate) + updateParams(sqrt(float upit)) lrate *= anneal + lrateh *= anneal let ttot = getElapsedTime() echo "End trajectory update: ", tup, " measure: ", ttot-tup, " total: ", ttot let et = getElapsedTime() From f8f61d057b331879e85429b5f1172baef425660a Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Tue, 2 Jul 2024 11:31:15 -0500 Subject: [PATCH 20/60] experimental/stagag: add g10f2 --- src/experimental/stagag.nim | 61 +++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 0fd20e8..13fd1bf 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -53,6 +53,16 @@ var pf2 = floatParam("f2", 0) pgf2 = floatParam("gf2", 0) pff2 = floatParam("ff2", 0) + pt3 = floatParam("t3", 0) + pg3 = floatParam("g3", 0) + pf3 = floatParam("f3", 0) + pgf3 = floatParam("gf3", 0) + pff3 = floatParam("ff3", 0) + pt4 = floatParam("t4", 0) + pg4 = floatParam("g4", 0) + pf4 = floatParam("f4", 0) + pgf4 = floatParam("gf4", 0) + pff4 = floatParam("ff4", 0) macro echoparam(x: auto): auto = let n = x.repr @@ -447,6 +457,56 @@ proc setupMDababa = addG(g0) addT(t0) +proc setupMDg10f2 = + if pt0 == 0: pt0 = 0.075 + if pt1 == 0: pt1 = 0.07 + if pt2 == 0: pt2 = 0.06 + if pt3 == 0: pt3 = 0.1 + if pt4 == 0: pt4 = 0.1 + if pg0 == 0: pg0 = 0.095 + if pg1 == 0: pg1 = 0.095 + if pg2 == 0: pg2 = 0.09 + if pg3 == 0: pg3 = 0.09 + #let eps = vtau / nsteps + let t0 = vtau * pushParam(pt0) + let t02 = 2 * t0 + let t1 = vtau * pushParam(pt1) + let t2 = vtau * pushParam(pt2) + let t3 = vtau * pushParam(pt3) + let t4 = vtau * pushParam(pt4) + let t5 = vtau - t02 - 2 * (t1 + t2 + t3 + t4) + let g0 = vtau * pushParam(pg0) + let g1 = vtau * pushParam(pg1) + let g2 = vtau * pushParam(pg2) + let g3 = vtau * pushParam(pg3) + let g4 = 0.5 * vtau - (g0 + g1 + g2 + g3) + let f0 = 0.5 * vtau + addT(t0) + for i in 0.. Date: Tue, 2 Jul 2024 17:21:47 -0500 Subject: [PATCH 21/60] add g5f2 integrator --- src/experimental/stagag.nim | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 13fd1bf..d0ef67a 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -457,6 +457,36 @@ proc setupMDababa = addG(g0) addT(t0) +#proc setupMDagabagabaga = +proc setupMDg5f2 = + if pt0 == 0: pt0 = 0.1 + if pt1 == 0: pt1 = 0.1 + if pg0 == 0: pg0 = 0.1 + if pg1 == 0: pg1 = 0.2 + let t0 = vtau * pushParam(pt0) + let t02 = 2 * t0 + let t1 = vtau * pushParam(pt1) + let t2 = 0.5*vtau - t0 - t1 + let g0 = vtau * pushParam(pg0) + let g1 = vtau * pushParam(pg1) + let g2 = vtau - 2*( g0 + g1 ) + let f0 = 0.5*vtau + addT(t0) + for i in 0.. Date: Tue, 2 Jul 2024 21:20:27 -0500 Subject: [PATCH 22/60] change force metrics --- src/experimental/stagag.nim | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index d0ef67a..f8b9d09 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -1684,6 +1684,11 @@ proc resetMeasure = for i in 0.. Date: Tue, 2 Jul 2024 22:24:36 -0500 Subject: [PATCH 23/60] change force statistics scale --- src/experimental/stagag.nim | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index f8b9d09..71bc7f2 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -1684,6 +1684,11 @@ proc resetMeasure = for i in 0.. Date: Wed, 3 Jul 2024 12:04:04 -0500 Subject: [PATCH 24/60] add g5f23 integrator --- src/experimental/stagag.nim | 58 +++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 71bc7f2..5613c75 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -352,10 +352,10 @@ proc addFf(g: GaugeV, i = 0) = var ffList = newSeqWith(1+hmasses.len, newSeq[int](0)) var ffeList = newSeqWith(1+hmasses.len, newSeq[FloatV](0)) -proc addFx(veps: FloatV, p0,g: GaugeV) = +proc addFx(veps: FloatV; p0,g: GaugeV; i0,i1: int) = var p = p0 if nf == 0: return - for i in 0..hmasses.len: + for i in i0..i1: addFf(g, i) pushMom() var va: FloatV @@ -376,7 +376,15 @@ proc addFx(veps: FloatV, p0,g: GaugeV) = proc addF(veps: FloatV) = if nf == 0: return - addFx(veps, momvs[^1], gaugevs[^1]) + addFx(veps, momvs[^1], gaugevs[^1], 0, hmasses.len) + +proc addF(veps: FloatV, i: int) = + if nf == 0: return + addFx(veps, momvs[^1], gaugevs[^1], i, i) + +proc addF(veps: FloatV; i0,i1: int) = + if nf == 0: return + addFx(veps, momvs[^1], gaugevs[^1], i0, i1) proc addFF(va, vb: FloatV) = if nf == 0: return @@ -387,7 +395,7 @@ proc addFF(va, vb: FloatV) = exp(momvs[^1], vbx, momvs[^2]) pushMom() mul(momvs[^1], momvs[^2], gaugevs[^1]) - addFx(va, p, momvs[^1]) + addFx(va, p, momvs[^1], 0, hmasses.len) proc setupMDx = pushTemp() @@ -461,8 +469,8 @@ proc setupMDababa = proc setupMDg5f2 = if pt0 == 0: pt0 = 0.1 if pt1 == 0: pt1 = 0.1 - if pg0 == 0: pg0 = 0.1 - if pg1 == 0: pg1 = 0.2 + if pg0 == 0: pg0 = 0.14 + if pg1 == 0: pg1 = 0.24 let t0 = vtau * pushParam(pt0) let t02 = 2 * t0 let t1 = vtau * pushParam(pt1) @@ -487,6 +495,43 @@ proc setupMDg5f2 = addG(g0) addT(t0) +proc setupMDg5f23 = + if pt0 == 0: pt0 = 0.09 + if pt1 == 0: pt1 = 0.13 + if pg0 == 0: pg0 = 0.13 + if pg1 == 0: pg1 = 0.23 + if pf0 == 0: pf0 = 0.26 + let t0 = vtau * pushParam(pt0) + let t02 = 2 * t0 + let t1 = vtau * pushParam(pt1) + let t2 = 0.5*vtau - t0 - t1 + let g0 = vtau * pushParam(pg0) + let g1 = vtau * pushParam(pg1) + let g2 = vtau - 2*( g0 + g1 ) + let f00 = vtau * pushParam(pf0) + let f01 = vtau - 2*f00 + let f10 = 0.5*vtau + let i0 = hmasses.len + let i1 = hmasses.len - 1 + addT(t0) + for i in 0.. Date: Wed, 3 Jul 2024 18:23:26 -0500 Subject: [PATCH 25/60] add g6f24 integrator make metropolis use comms echo --- src/experimental/stagag.nim | 43 +++++++++++++++++++++++++++++++++++++ src/hmc/metropolis.nim | 2 +- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 5613c75..cbc85ef 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -532,6 +532,48 @@ proc setupMDg5f23 = addG(g0) addT(t0) +proc setupMDg6f24 = + if pt0 == 0: pt0 = 0.1 + if pt1 == 0: pt1 = 0.11 + if pt2 == 0: pt2 = 0.13 + if pg0 == 0: pg0 = 0.12 + if pg1 == 0: pg1 = 0.22 + if pf0 == 0: pf0 = 0.22 + let t0 = vtau * pushParam(pt0) + let t02 = 2 * t0 + let t1 = vtau * pushParam(pt1) + let t2 = vtau * pushParam(pt2) + let t3 = vtau - 2*(t0 + t1 + t2) + let g0 = vtau * pushParam(pg0) + let g1 = vtau * pushParam(pg1) + let g2 = 0.5*vtau - g0 - g1 + let f00 = vtau * pushParam(pf0) + let f01 = 0.5*vtau - f00 + let f10 = 0.5*vtau + let i0 = hmasses.len + let i1 = hmasses.len - 1 + addT(t0) + for i in 0.. Date: Thu, 4 Jul 2024 17:52:10 -0500 Subject: [PATCH 26/60] add g10f28 integrator --- src/experimental/stagag.nim | 76 ++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index cbc85ef..f05b140 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -5,7 +5,11 @@ import observables/sources import quda/qudaWrapper import hmc/agradOps -qexinit() +qexInit() +echo "rank ", myRank, "/", nRanks +threads: + echo "thread ", threadNum, "/", numThreads + proc `:=`*(r: var seq, x: seq) = for i in 0.. Date: Sat, 6 Jul 2024 22:18:08 -0500 Subject: [PATCH 27/60] fixed force gradient for Hasenbusch masses --- src/experimental/stagag.nim | 49 ++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index f05b140..75ae886 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -354,6 +354,20 @@ proc addFf(g: GaugeV, i = 0) = projtah(momvs[^1], momvs[^2]) nff[i] += 1 +proc massFactor0(veps: FloatV, i: int): FloatV = + if i == 0: + result = (-0.5/mass) * veps + else: + result = -0.5 * (veps/vhmasses[i-1]) + +proc massFactor(veps: FloatV, i: int): FloatV = + result = massFactor0(veps, i) + if i < hmasses.len: # last term is just inverse (no ratio) + if i == 0: + result = result * (vhmasses[0]*vhmasses[0]-mass*mass) + else: + result = result * (vhmasses[i]*vhmasses[i]-vhmasses[i-1]*vhmasses[i-1]) + var ffList = newSeqWith(1+hmasses.len, newSeq[int](0)) var ffeList = newSeqWith(1+hmasses.len, newSeq[FloatV](0)) proc addFx(veps: FloatV; p0,g: GaugeV; i0,i1: int) = @@ -362,17 +376,7 @@ proc addFx(veps: FloatV; p0,g: GaugeV; i0,i1: int) = for i in i0..i1: addFf(g, i) pushMom() - var va: FloatV - if i == hmasses.len: # last term is just inverse (no ratio) - if i == 0: - va = (-0.5/mass) * veps - else: - va = -0.5 * (veps/vhmasses[i-1]) - else: - if i == 0: - va = (-0.5/mass) * veps * (vhmasses[0]*vhmasses[0]-mass*mass) - else: - va = -0.5*veps*(vhmasses[i]*vhmasses[i]/vhmasses[i-1]-vhmasses[i-1]) + let va = massFactor(veps, i) xpay(momvs[^1], p, va, momvs[^2]) p = momvs[^1] ffList[i].add momvs.len-2 @@ -390,16 +394,21 @@ proc addF(veps: FloatV; i0,i1: int) = if nf == 0: return addFx(veps, momvs[^1], gaugevs[^1], i0, i1) -proc addFF(va, vb: FloatV) = +proc addFF(va, vb: FloatV; i0,i1: int) = if nf == 0: return - let p = momvs[^1] - addFf(gaugevs[^1]) - pushMom() - let vbx = (-0.5/mass) * vb - exp(momvs[^1], vbx, momvs[^2]) - pushMom() - mul(momvs[^1], momvs[^2], gaugevs[^1]) - addFx(va, p, momvs[^1], 0, hmasses.len) + var p = momvs[^1] + let g = gaugevs[^1] + for i in i0..i1: + addFf(g, i) + pushMom() + let vbx = massFactor(vb, i) + exp(momvs[^1], vbx, momvs[^2]) + pushMom() + mul(momvs[^1], momvs[^2], gaugevs[^1]) + addFx(va, p, momvs[^1], i, i) + p = momvs[^1] + +template addFF(va, vb: FloatV) = addFF(va, vb, 0, hmasses.len) proc setupMDx = pushTemp() From e9fa6a342486fd0455d76e27758db3889fe692ed Mon Sep 17 00:00:00 2001 From: James Osborn Date: Tue, 9 Jul 2024 13:34:12 -0500 Subject: [PATCH 28/60] add force solver statistics --- src/experimental/stagag.nim | 33 +++++++++++++++++++++++++-------- src/hmc/agradOps.nim | 11 +++++++---- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 75ae886..e4a18cf 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -168,12 +168,19 @@ var spa = initSolverParams() spa.r2req = arsq spa.maxits = 10000 #spa.backend = sbQex -var spf = initSolverParams() -#spf.subsetName = "even" -spf.r2req = frsq -spf.maxits = 10000 -spf.verbosity = 0 -#spf.backend = sbQex +var spf = newSeq[type spa](hmasses.len+1) # fermion force forward +var spfb = newSeq[type spa](hmasses.len+1) # fermion force backward +for i in 0.. 0: if n mod upit == 0: @@ -1992,6 +2004,9 @@ block: #echo &"gu: {gutime} gf: {gftime} ff: {fftime} ot: {et-at} tt: {et}" resetMeasure() +for i in 0.. 0: m.clearStats pacc.clear @@ -2006,6 +2021,8 @@ if trajs > 0: #echo "cost: ", nff/(vtau.obj*vtau.obj*m.avgPAccept) echo "cost: ", getCost0(m) let tup = getElapsedTime() + for i in 0.. Date: Tue, 9 Jul 2024 20:03:09 -0500 Subject: [PATCH 29/60] fix force-gradient integrator for multiple steps --- src/experimental/stagag.nim | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index e4a18cf..6d31f9f 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -829,6 +829,7 @@ proc setupMDabacaba = if pgf1 == 0: pgf1 = 0.006938106540706989*(2.0/(1-2*pg0)) if pff1 == 0: pff1 = 0.006938106540706989*(2.0/(1-2*pf0)) let t0 = vtau * pushParam(pt0) + let t02 = 2 * t0 let g0 = vtau * pushParam(pg0) let f0 = if nf==0: g0 else: vtau * pushParam(pf0) let t1 = 0.5 * vtau - t0 @@ -837,14 +838,16 @@ proc setupMDabacaba = let gf1 = vtau*vtau*pushParam(pgf1) let ff1 = if nf==0: gf1 else: vtau*vtau*pushParam(pff1) addT(t0) - addG(g0) - addF(f0) - addT(t1) - addGF(g1, gf1) - addFF(f1, ff1) - addT(t1) - addF(f0) - addG(g0) + for i in 0.. Date: Wed, 10 Jul 2024 16:01:26 -0500 Subject: [PATCH 30/60] combine gauge and fermion force gradient --- src/experimental/stagag.nim | 43 +++++++++++++++++++++++++++++-------- src/hmc/agradOps.nim | 34 ++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 6d31f9f..4b9305f 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -417,6 +417,30 @@ proc addFF(va, vb: FloatV; i0,i1: int) = template addFF(va, vb: FloatV) = addFF(va, vb, 0, hmasses.len) +proc addGFF(va, vb: FloatV; i0,i1: int) = + if nf == 0: return + var p = momvs[^1] + let g = gaugevs[^1] + addGf(g) + pushMom() + var s = momvs[^1] + mul(s, vb, momvs[^2]) + for i in i0..i1: + addFf(g, i) + let vbx = massFactor(vb, i) + pushMom() + xpay(momvs[^1], s, vbx, momvs[^2]) + s = momvs[^1] + pushMom() + exp(momvs[^1], s) + pushMom() + mul(momvs[^1], momvs[^2], g) + s = momvs[^1] + addGx(va, p, s) + addFx(va, momvs[^1], s, i0, i1) + +template addGFF(va, vb: FloatV) = addGFF(va, vb, 0, hmasses.len) + proc setupMDx = pushTemp() let vtau2 = ptemps[^1] @@ -825,28 +849,29 @@ proc setupMDbacab = proc setupMDabacaba = if pt0 == 0: pt0 = 0.08935804763220157 if pg0 == 0: pg0 = 0.2470939580390842 - if pf0 == 0: pf0 = 0.2470939580390842 + #if pf0 == 0: pf0 = 0.2470939580390842 if pgf1 == 0: pgf1 = 0.006938106540706989*(2.0/(1-2*pg0)) - if pff1 == 0: pff1 = 0.006938106540706989*(2.0/(1-2*pf0)) + #if pff1 == 0: pff1 = 0.006938106540706989*(2.0/(1-2*pf0)) let t0 = vtau * pushParam(pt0) let t02 = 2 * t0 let g0 = vtau * pushParam(pg0) - let f0 = if nf==0: g0 else: vtau * pushParam(pf0) + #let f0 = if nf==0: g0 else: vtau * pushParam(pf0) let t1 = 0.5 * vtau - t0 let g1 = vtau - 2 * g0 - let f1 = vtau - 2 * f0 + #let f1 = vtau - 2 * f0 let gf1 = vtau*vtau*pushParam(pgf1) - let ff1 = if nf==0: gf1 else: vtau*vtau*pushParam(pff1) + #let ff1 = if nf==0: gf1 else: vtau*vtau*pushParam(pff1) addT(t0) for i in 0.. Date: Wed, 10 Jul 2024 18:02:41 -0500 Subject: [PATCH 31/60] allow different coefficients for G and F forces in force-gradient --- src/experimental/stagag.nim | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 4b9305f..2cfd00e 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -417,17 +417,16 @@ proc addFF(va, vb: FloatV; i0,i1: int) = template addFF(va, vb: FloatV) = addFF(va, vb, 0, hmasses.len) -proc addGFF(va, vb: FloatV; i0,i1: int) = - if nf == 0: return +proc addGFF(vag, vbg, vaf, vbf: FloatV; i0,i1: int) = var p = momvs[^1] let g = gaugevs[^1] addGf(g) pushMom() var s = momvs[^1] - mul(s, vb, momvs[^2]) + mul(s, vbg, momvs[^2]) for i in i0..i1: addFf(g, i) - let vbx = massFactor(vb, i) + let vbx = massFactor(vbf, i) pushMom() xpay(momvs[^1], s, vbx, momvs[^2]) s = momvs[^1] @@ -436,10 +435,11 @@ proc addGFF(va, vb: FloatV; i0,i1: int) = pushMom() mul(momvs[^1], momvs[^2], g) s = momvs[^1] - addGx(va, p, s) - addFx(va, momvs[^1], s, i0, i1) + addGx(vag, p, s) + addFx(vaf, momvs[^1], s, i0, i1) -template addGFF(va, vb: FloatV) = addGFF(va, vb, 0, hmasses.len) +template addGFF(vag, vbg, vaf, vbf: FloatV) = addGFF(vag, vbg, vaf, vbf, 0, hmasses.len) +template addGFF(va, vb: FloatV) = addGFF(va, vb, va, vb, 0, hmasses.len) proc setupMDx = pushTemp() @@ -849,29 +849,29 @@ proc setupMDbacab = proc setupMDabacaba = if pt0 == 0: pt0 = 0.08935804763220157 if pg0 == 0: pg0 = 0.2470939580390842 - #if pf0 == 0: pf0 = 0.2470939580390842 + if pf0 == 0: pf0 = 0.2470939580390842 if pgf1 == 0: pgf1 = 0.006938106540706989*(2.0/(1-2*pg0)) - #if pff1 == 0: pff1 = 0.006938106540706989*(2.0/(1-2*pf0)) + if pff1 == 0: pff1 = 0.006938106540706989*(2.0/(1-2*pf0)) let t0 = vtau * pushParam(pt0) let t02 = 2 * t0 let g0 = vtau * pushParam(pg0) - #let f0 = if nf==0: g0 else: vtau * pushParam(pf0) + let f0 = if nf==0: g0 else: vtau * pushParam(pf0) let t1 = 0.5 * vtau - t0 let g1 = vtau - 2 * g0 - #let f1 = vtau - 2 * f0 + let f1 = vtau - 2 * f0 let gf1 = vtau*vtau*pushParam(pgf1) - #let ff1 = if nf==0: gf1 else: vtau*vtau*pushParam(pff1) + let ff1 = if nf==0: gf1 else: vtau*vtau*pushParam(pff1) addT(t0) for i in 0.. Date: Thu, 11 Jul 2024 11:24:15 -0500 Subject: [PATCH 32/60] add trivial multimass interface --- src/physics/stagSolve.nim | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/physics/stagSolve.nim b/src/physics/stagSolve.nim index 34f309d..5406fed 100644 --- a/src/physics/stagSolve.nim +++ b/src/physics/stagSolve.nim @@ -36,6 +36,23 @@ proc solveEO*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams) = echo "op time: ", top echo "solve time: ", secs, " Gflops: ", 1e-9*flops.float/secs +# multimass (trivial version with multiple single mass calls for now) +proc solveEO*(s: Staggered; r: seq[Field]; x: Field; m: seq[float]; + sp: seq[SolverParams]) = + let n = m.len + doAssert(r.len == n) + doAssert(sp.len == n) + for i in 0.. Date: Thu, 11 Jul 2024 17:45:54 -0500 Subject: [PATCH 33/60] experimental/symbolic: WIP committed for archive --- src/experimental/symbolic/gauge.nim | 545 ++++++++++++++++++++----- src/experimental/symbolic/graph.nim | 591 +++++++++++++++++++++++++--- 2 files changed, 967 insertions(+), 169 deletions(-) diff --git a/src/experimental/symbolic/gauge.nim b/src/experimental/symbolic/gauge.nim index a933a32..38ade88 100644 --- a/src/experimental/symbolic/gauge.nim +++ b/src/experimental/symbolic/gauge.nim @@ -1,5 +1,5 @@ import experimental/symbolic/graph -import ../../gauge, physics/qcdTypes +import layout, ../../gauge, physics/qcdTypes # # gauge support @@ -21,7 +21,7 @@ method `$`*(v: SymNodeValueGauge): string = "gaugeValue" proc assign*(z: SymNode, v: Gauge) = z.assign SymNodeValueGauge(gaugeValue: v) -method copySymNodeValue*(v: SymNodeValueGauge): SymNodeValueGauge = +method copySymNodeValue*(v: SymNodeValueGauge): SymNodeValue = # TODO: we don't need this, if we don't take gradient after assign let z = v.gaugeValue[0].l.newGauge threads: @@ -29,127 +29,119 @@ method copySymNodeValue*(v: SymNodeValueGauge): SymNodeValueGauge = z[mu] := v.gaugeValue[mu] SymNodeValueGauge(gaugeValue: z) +method identAllocateSymNodeValue*(x: SymNodeValueGauge): SymNodeValue = + # TODO: leave it uninitialized? + SymNodeValueGauge(gaugeValue: x.gaugeValue[0].l.newGauge) + method identSymNodeValue*(z: SymNodeValueGauge, x: SymNodeValueGauge) = threads: for mu in 0.. SymNode +type SymNodeValueGaugeActionCoeffs* = ref object of SymNodeValueConcrete + gaugeActionCoeffsValue*: GaugeActionCoeffs + +method getGaugeActionCoeffs*(v: SymNodeValue): GaugeActionCoeffs {.base.} = + raiseValueError("Custom method required for value: " & $v) + +method getGaugeActionCoeffs*(v: SymNodeValueGaugeActionCoeffs): GaugeActionCoeffs = + v.gaugeActionCoeffsValue + +method setGaugeActionCoeffs*(v: SymNodeValue, c: GaugeActionCoeffs) {.base.} = + raiseValueError("Custom method required for value: " & $v) + +method setGaugeActionCoeffs*(v: SymNodeValueGaugeActionCoeffs, c: GaugeActionCoeffs) = + v.gaugeActionCoeffsValue = c + +proc gaugeAction*(c: SymNodeValue, beta: SymNode, g: SymNode): SymNode +proc gaugeActionDeriv*(c: SymNodeValue, beta: SymNode, g: SymNode): SymNode +proc gaugeActionDeriv2*(c: SymNodeValue, beta: SymNode, g: SymNode, f: SymNode): SymNode +proc projectTAH*(x: SymNode): SymNode +proc contractProjectTAH*(x: SymNode, y: SymNode): SymNode + +proc gaugeForce*(c: SymNodeValue, beta: SymNode, g: SymNode): SymNode = + contractProjectTAH(g, gaugeActionDeriv(c, beta, g)) + +method gaugeActionAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = + raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) + +method gaugeActionSymNodeValue*(z: SymNodeValue, c: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) + +method gaugeActionSymNodeValue*(z: SymNodeValueFloat, c: SymNodeValueGaugeActionCoeffs, beta: SymNodeValueFloat, g: SymNodeValueGauge) = + let gc = beta.floatValue * c.getGaugeActionCoeffs + if gc.adjplaq == 0: + z.floatValue = gc.gaugeAction1 g.gaugeValue + elif gc.rect == 0 and gc.pgm == 0: + z.floatValue = gc.actionA g.gaugeValue + else: + raiseValueError("Gauge coefficient unsupported: " & $c.getGaugeActionCoeffs) + +method gaugeActionAllocateSymNodeValue*(beta: SymNodeValueFloat, g: SymNodeValueGauge): SymNodeValue = + SymNodeValueFloat() -proc `*`(x: float, y: SymNode): SymNode = - return newSymNode(value = SymNodeValueFloat(floatValue: x)).mul y +proc gaugeActionForward(z: SymNode) = + gaugeActionSymNodeValue(z.value, z.getArg, z.inputs[0].value, z.inputs[1].value) -proc `*`(x: SymNode, y: float): SymNode = - return x.mul newSymNode(value = SymNodeValueFloat(floatValue: y)) +proc gaugeActionAllocate(z: SymNode) = + z.value = gaugeActionAllocateSymNodeValue(z.inputs[0].value, z.inputs[1].value) + +proc gaugeActionBackward(z: SymNode, i: int, dep: SymNode): SymNode = + ## Always treat beta as a multiplicative factor + let g = z.gradientDependentOrNil dep + case i + of 0: + if g == nil: + return z / z.inputs[0] + else: + return (g * z) / z.inputs[0] + of 1: + let f = gaugeForce(z.getArg, z.inputs[0], z.inputs[1]) + if g == nil: + return f + else: + return g.mul f + else: + raiseError("gaugeAction has 2 operand, got i = " & $i) + +proc gaugeAction*(c: SymNodeValue, beta: SymNode, g: SymNode): SymNode = + newSymNode( + inputs = @[beta, g], + arg = c, + forward = gaugeActionForward, + allocateValue = gaugeActionAllocate, + backward = gaugeActionBackward, + name = "gaugeAction") + +method gaugeActionDerivAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = + raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) + +method gaugeActionDerivSymNodeValue*(z: SymNodeValue, c: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) + +method gaugeActionDerivAllocateSymNodeValue*(beta: SymNodeValueFloat, g: SymNodeValueGauge): SymNodeValue = + SymNodeValueGauge(gaugeValue: g.gaugeValue[0].l.newGauge) + +method gaugeActionDerivSymNodeValue*(z: SymNodeValueGauge, c: SymNodeValueGaugeActionCoeffs, beta: SymNodeValueFloat, g: SymNodeValueGauge) = + let gc = beta.floatValue * c.getGaugeActionCoeffs + if gc.adjplaq == 0: + gc.gaugeActionDeriv(g.gaugeValue, z.gaugeValue) + elif gc.rect == 0 and gc.pgm == 0: + gc.gaugeADeriv(g.gaugeValue, z.gaugeValue) + else: + raiseValueError("Gauge coefficient unsupported: " & $c.getGaugeActionCoeffs) + +proc gaugeActionDerivForward(z: SymNode) = + gaugeActionDerivSymNodeValue(z.value, z.getArg, z.inputs[0].value, z.inputs[1].value) + +proc gaugeActionDerivAllocate(z: SymNode) = + z.value = gaugeActionDerivAllocateSymNodeValue(z.inputs[0].value, z.inputs[1].value) + +proc gaugeActionDerivBackward(z: SymNode, i: int, dep: SymNode): SymNode = + ## Always treat beta as a multiplicative factor + let g = z.gradientDependentOrNil dep + if g == nil: + raiseValueError("gradient of " & $dep & " with respect to " & $z & " does not exists") + case i + of 0: + # beta derivative + #return g.dot(z) / z.inputs[0] + raiseValueError("unimplemented") + of 1: + return g.mul gaugeActionDeriv2(z.getArg, z.inputs[0], z.inputs[1], g) + else: + raiseError("gaugeForce has 2 operand, got i = " & $i) + +proc gaugeActionDeriv*(c: SymNodeValue, beta: SymNode, g: SymNode): SymNode = + newSymNode( + inputs = @[beta, g], + arg = c, + forward = gaugeActionDerivForward, + allocateValue = gaugeActionDerivAllocate, + backward = gaugeActionDerivBackward, + name = "gaugeActionDeriv") + +method gaugeActionDeriv2AllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue, z: SymNodeValue): SymNodeValue {.base.} = + raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr & "\n " & z.repr) + +method gaugeActionDeriv2SymNodeValue*(z: SymNodeValue, c:SymNodeValue, x: SymNodeValue, y: SymNodeValue, h: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.repr & "\n " & c.repr & "\n " & x.repr & "\n " & y.repr & "\n " & h.repr) + +method gaugeActionDeriv2AllocateSymNodeValue*(beta: SymNodeValueFloat, g: SymNodeValueGauge, c: SymNodeValueGauge): SymNodeValue = + SymNodeValueGauge(gaugeValue: g.gaugeValue[0].l.newGauge) + +method gaugeActionDeriv2SymNodeValue*(z: SymNodeValueGauge, c: SymNodeValueGaugeActionCoeffs, beta: SymNodeValueFloat, g: SymNodeValueGauge, h: SymNodeValueGauge) = + let gc = beta.floatValue * c.getGaugeActionCoeffs + if gc.adjplaq == 0 and gc.rect == 0 and gc.pgm == 0: + gc.gaugeDerivDeriv2(g.gaugeValue, h.gaugeValue, z.gaugeValue) + else: + raiseValueError("Gauge coefficient unsupported: " & $c.getGaugeActionCoeffs) + +proc gaugeActionDeriv2Forward(z: SymNode) = + gaugeActionDeriv2SymNodeValue(z.value, z.getArg, z.inputs[0].value, z.inputs[1].value, z.inputs[2].value) + +proc gaugeActionDeriv2Allocate(z: SymNode) = + z.value = gaugeActionDeriv2AllocateSymNodeValue(z.inputs[0].value, z.inputs[1].value, z.inputs[2].value) + +proc gaugeActionDeriv2Backward(z: SymNode, i: int, dep: SymNode): SymNode = + raiseValueError("unimplemented") + +proc gaugeActionDeriv2*(c: SymNodeValue, beta: SymNode, g: SymNode, f: SymNode): SymNode = + ## Remember to set arg for GaugeActionCoeffs with beta factored out + newSymNode( + inputs = @[beta, g, f], + arg = c, + forward = gaugeActionDeriv2Forward, + allocateValue = gaugeActionDeriv2Allocate, + backward = gaugeActionDeriv2Backward, + name = "gaugeActionDeriv2") + +method contractProjectTAHAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = + raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) + +method contractProjectTAHSymNodeValue*(z: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = + raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) + +method contractProjectTAHAllocateSymNodeValue*(x: SymNodeValueGauge, y: SymNodeValueGauge): SymNodeValue = + SymNodeValueGauge(gaugeValue: x.gaugeValue[0].l.newGauge) + +method contractProjectTAHSymNodeValue*(z: SymNodeValueGauge, x: SymNodeValueGauge, y: SymNodeValueGauge) = + let nd = z.gaugeValue.len + type T = type(z.gaugeValue[0]) + let f = cast[ptr cArray[T]](unsafeAddr(z.gaugeValue[0])) + let u = cast[ptr cArray[T]](unsafeAddr(x.gaugeValue[0])) + let o = cast[ptr cArray[T]](unsafeAddr(y.gaugeValue[0])) + threads: + for mu in 0.. 0: raiseError("inputs.len: " & $z.inputs.len & ", but no forward function defined for:\n" & z.nodeRepr) - z.tag.excl sntNeedUpdate + z.epoch = highEpoch proc eval*(z: SymNode) = - z.tagUpdateRec - z.tagClearRec sntVisited z.evalRec z.tagClearRec sntVisited @@ -346,11 +787,12 @@ proc gradientRec(z: SymNode, dep: SymNode) = if grad != nil: if sntFixedGradient notin input.tag: let childGrad = z.backward(z, i, dep) - # We always request update for the newly created gradient node. - childGrad.tag.incl sntNeedUpdate # We need to combine the gradient without breaking the existing graph. # Because the previous built graph may have a reference of this node, `grad`, # our new node has to reuse `grad`. We use a copy of `grad` and assign back. + #echo "TODO: recombine grad: when do we need to reuse the node? Reference counting?" + # In this gradient call during the previous traversal, the grad node was used by the children of this node. + #echo " ",grad.nodeRepr grad.assignSymNode(grad.copySymNode.add childGrad) else: raiseError(z.nodeRepr & ":" & $i & ":" & input.nodeRepr & ": visited but no gradient") @@ -359,8 +801,6 @@ proc gradientRec(z: SymNode, dep: SymNode) = input.tag.incl sntVisited if grad == nil: let childGrad = z.backward(z, i, dep) - # We always request update for the newly created gradient node. - childGrad.tag.incl sntNeedUpdate gradientDependentAssign(input, dep, childGrad) else: # Existent gradient means it was setup previously outside of this gradientRec(z, dep), @@ -403,7 +843,7 @@ proc sharedNodesRec(shared: var seq[SymNode], z: SymNode) = # func nodeRepr*(z: SymNode): string = - result = $z & $z.tag & ": " & $z.value & ", run: " & $z.runCount + result = $z & $z.tag & ": " & $z.value & ", run: " & $z.runCount & ", epoch: " & $z.epoch & ", ref: " & $z.refCount if z.arg != nil: result &= ", arg: " & $z.arg if z.inputs.len > 0: @@ -448,6 +888,12 @@ func treeRepr*(z: SymNode): string = # when isMainModule: + var failed = 0 + proc assertRunCount(z: SymNode, c: int) = + if z.runCount != c: + failed.inc + echo "Failed: expect run count: ",c," but got node: ",z.nodeRepr + let x = newSym("x") echo x.nodeRepr let y = newSym("y") @@ -502,3 +948,24 @@ when isMainModule: w.allocate w.eval echo w.treeRepr + + w.assertRunCount 1 + z.assertRunCount 2 + dwdx.assertRunCount 1 + + x.assign 0.7 + y.assign 0.5 + echo "\n## x.assign 0.7 & y.assign 0.5\n" + dzdx.eval + echo dzdx.treeRepr + dwdx.eval + echo dwdx.treeRepr + w.eval + echo w.treeRepr + + w.assertRunCount 2 + z.assertRunCount 3 + dwdx.assertRunCount 1 + + if failed > 0: + quit 1 From 09da7b3d69fa7107e4aa967c508c4eb9b527777c Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Thu, 11 Jul 2024 17:47:32 -0500 Subject: [PATCH 34/60] experimental/symbolic: remove --- src/experimental/symbolic/gauge.nim | 577 ----------------- src/experimental/symbolic/graph.nim | 971 ---------------------------- 2 files changed, 1548 deletions(-) delete mode 100644 src/experimental/symbolic/gauge.nim delete mode 100644 src/experimental/symbolic/graph.nim diff --git a/src/experimental/symbolic/gauge.nim b/src/experimental/symbolic/gauge.nim deleted file mode 100644 index 38ade88..0000000 --- a/src/experimental/symbolic/gauge.nim +++ /dev/null @@ -1,577 +0,0 @@ -import experimental/symbolic/graph -import layout, ../../gauge, physics/qcdTypes - -# -# gauge support -# - -# TODO: needs more thought -type Gauge = seq[DLatticeColorMatrixV] - -type SymNodeValueGauge = ref object of SymNodeValueConcrete - gaugeValue: Gauge - -method getGauge*(v: SymNodeValue): Gauge {.base.} = - raiseValueError("Custom method required for value: " & $v) - -method getGauge*(v:SymNodeValueGauge): Gauge = v.gaugeValue - -method `$`*(v: SymNodeValueGauge): string = "gaugeValue" - -proc assign*(z: SymNode, v: Gauge) = - z.assign SymNodeValueGauge(gaugeValue: v) - -method copySymNodeValue*(v: SymNodeValueGauge): SymNodeValue = - # TODO: we don't need this, if we don't take gradient after assign - let z = v.gaugeValue[0].l.newGauge - threads: - for mu in 0.. 0: - qexWarn "Nonexistent gauge file: ", gaugefile - @[4,4,4,8] - gact:GaugeActType = "ActWilson" - beta = 6.0 - adjFac = -0.25 - rectFac = -1.4088 - seed:uint64 = 4321 - dt = 0.1 - echoParams() - echo "rank ", myRank, "/", nRanks - threads: echo "thread ", threadNum, "/", numThreads - - gc.setGaugeActionCoeffs(case gact - of ActWilson: GaugeActionCoeffs(plaq: 1.0) - of ActAdjoint: GaugeActionCoeffs(plaq: 1.0, adjplaq: adjFac) - of ActRect: gaugeActRect(1.0, rectFac) - of ActSymanzik: Symanzik(1.0) - of ActIwasaki: Iwasaki(1.0) - of ActDBW2: DBW2(1.0)) - let - lo = lat.newLayout - vol = lo.physVol - - echo gc - var r = lo.newRNGField(MRG32k3a, seed) - var R:MRG32k3a # global RNG - R.seed(seed, 987654321) - - var g = lo.newgauge - var mom = lo.newgauge - threads: - g.unit - mom.randomTAH r - - b.assign beta - u.assign g - p.assign mom - eps.assign dt - optimize(output = @[hh,h,dhdp,checkdhdp], variables = @[p,u,eps,b]) - - hh.allocate - h.allocate - k.allocate - dhdp.allocate - checkdhdp.allocate - echo "k:\n",k.treerepr - echo "dhdp:\n",dhdp.treerepr - echo "norm2(dhdp-p):\n",checkdhdp.treerepr - echo "hh:\n",hh.treerepr - - hh.eval - h.eval - dhdp.eval - checkdhdp.eval - - echo h.value.getFloat, " ", s.value.getFloat, " ", k.value.getFloat - echo hh.value.getFloat, " ", hh.inputs[0].value.getFloat, " ", hh.inputs[1].value.getFloat - if checkdhdp.value.getFloat < 1e-30 * k.value.getFloat: - echo "Pass: check dhdp" - else: - echo "Failed: check dhdp, norm2(dhdp-p) = ", checkdhdp.value.getFloat - - echo "k:\n",k.treerepr - echo "dhdp:\n",dhdp.treerepr - echo "norm2(dhdp-p):\n",checkdhdp.treerepr - - threads: - mom.randomTAH r - p.updated - - h.eval - dhdp.eval - checkdhdp.eval - - echo h.value.getFloat, " ", s.value.getFloat, " ", k.value.getFloat - if checkdhdp.value.getFloat < 1e-30 * k.value.getFloat: - echo "Pass: check dhdp" - else: - echo "Failed: check dhdp, norm2(dhdp-p) = ", checkdhdp.value.getFloat - - echo "k:\n",k.treerepr - echo "dhdp:\n",dhdp.treerepr - echo "norm2(dhdp-p):\n",checkdhdp.treerepr diff --git a/src/experimental/symbolic/graph.nim b/src/experimental/symbolic/graph.nim deleted file mode 100644 index 1e47a75..0000000 --- a/src/experimental/symbolic/graph.nim +++ /dev/null @@ -1,971 +0,0 @@ -## lazy generic computational graph - -#[ - -requires: `--multimethods:on` - -We want the symbolic graph nodes to be type generic. Therefore we -need dynamic dispatch based on object types at runtime. This -implementation uses Nim's builtin multimethods for this purpose. - -Nim's multimethods may slow down single method dispatch time, which -would affect the performance of the comms module. We need to measure -it to understand the impact. - -The functions, ident and add, are the same as a variable argument -function, in terms of symbolic formulae, but we treat functions -with different number of arguments differently, because the -implementations of the functions would be different. It also avoids -increasing dynamic dispatch overhead. - -Typed values are enclosed in derived types of `SymNodeValueConcrete` -and referenced from nodes, which have a single type. Since Nim -doesn't allow mixing generic functions with methods, we need to -define a method for each and every combination of concrete types -we use, and wrap our existing generic functions in each method. - -Both SymNodeValueConcrete and SymNode are references. In the graph -we build, a shared node means the same variable. We create new -nodes that equal to the existing nodes with the ident function to -create new referenced node objects, in order to avoid false sharing. TODO: when do we really need ident? -We use copySymNodeValue to create new referenced value objects, -such that different nodes refer to distinct value objects. - -We use the tag sntVisited to avoid repeatedly traverse shared nodes. -The recursive graph traversal function all ends with Rec, just to -remind us to call `tagClearRec(z, sntVisited)` in the top level. - -Further optimizations only possible after building all the graphs: -- Remove ident nodes -- Analyze and reuse allocations when possible - -TEMP NOTES - -Make backward dispatch based on types -- all nodes have to already have the value type -- only need to call the method using the values, which allow dispatch at runtime -- need to create typed values when creating the node -- so the node creating functions must dispatch based on the value types -- everything needs to be dispatched with methods -- should I just make graph nodes with type instead? - -]# - -from math import exp - -# -# basic type support -# - -type - SymNodeTag = enum - sntVisited, sntNeedGradient, sntFixedGradient - # sntReusable, ... - SymNodeTags = set[SymNodeTag] - SymNodeValue* = ref object of RootObj ## Represent unallocated symbolic value - SymNodeValueConcrete* = ref object of SymNodeValue ## For any concrete values - SymNodeGradient = object - ## for a particular variable v - dependent: SymNode ## a variable v that depends on this node, x - gradient: SymNode ## dv/dx - SymNode* = ref object - # This object can be cyclic, because gradients refer to ancestor nodes - value*: SymNodeValue - inputs*: seq[SymNode] - forward: proc(z: SymNode) ## runs the actual compute - arg: SymNodeValue ## extra argument forward/backward uses, must be immutable and can be shared, use getArg/setArg - runCount: int - epoch: int ## for resolving dependency, tracks update - allocateValue: proc(z: SymNode) - backward: proc(z: SymNode, i: int, dep: SymNode): SymNode ## create graphs - gradients: seq[SymNodeGradient] ## saved gradient graphs - name: string - tag: SymNodeTags - id: int - refCount: int - SymNodeError = object of Defect - SymNodeValueError = object of Defect - -template raiseError*(msg: string) = - raise newException(SymNodeError, msg) - -template raiseValueError*(msg: string) = - raise newException(SymNodeValueError, msg) - -template raiseErrorBaseMethod*(msg: string) = - raise newException( - SymNodeError, - "Base method invoked: " & msg & - "\nMake sure to pass `--multimethods:on` and check there is a custom method for each derived type.") - -method `$`*(v: SymNodeValue): string {.base.} = "SymNodeValue" - -func `$`*(z: SymNode): string = - z.name & "#" & $z.id - -method isSymNodeValueConcrete*(v: SymNodeValue): bool {.base.} = false -method isSymNodeValueConcrete*(v: SymNodeValueConcrete): bool = true - -func getArg*(z: SymNode): SymNodeValue = z.arg -proc setArg*(z: SymNode, v: SymNodeValue) = - if z.arg != nil and z.arg.isSymNodeValueConcrete: - raiseValueError("Cannot set z.arg, which is a concrete value: " & $z.arg) - else: - z.arg = v - -func nodeRepr*(z: SymNode): string -func treeRepr*(z: SymNode): string - -method copySymNodeValue*(v: SymNodeValue): SymNodeValue {.base.} = - ## nothing to copy - v - -method copySymNodeValue*(v: SymNodeValueConcrete): SymNodeValue = - raiseValueError("Custom method required for concrete value: " & $v) - -proc newSymNode*( - value = SymNodeValue(), - inputs: seq[SymNode] = @[], - forward: proc(z: SymNode) = nil, - arg: SymNodeValue = nil, - runCount: int = 0, - epoch: int = 0, - allocateValue: proc(z: SymNode) = nil, - backward: proc(z: SymNode, i: int, dep: SymNode): SymNode = nil, - gradients: seq[SymNodeGradient] = @[], - name: string = "", - refCount: int = 0, - tag: SymNodeTags = {}): SymNode = - ## Create new SymNode with a unique id. - var id {.global.} = 0 - result = SymNode(value: value, inputs: inputs, forward: forward, arg: arg, runCount: runCount, - epoch: epoch, allocateValue: allocateValue, backward: backward, gradients: gradients, - name: name, tag: tag, id: id, refCount: refCount) - id.inc - for i in result.inputs: - i.refCount.inc - -proc copySymNode*(z: SymNode): SymNode = - newSymNode(value = z.value.copySymNodeValue, inputs = z.inputs, forward = z.forward, - arg = z.arg, runCount = z.runCount, epoch = z.epoch, - allocateValue = z.allocateValue, backward = z.backward, gradients = z.gradients, - name = z.name, tag = z.tag, refCount = z.refCount) - -proc assignSymNode*(z: SymNode, x: SymNode) = - z.value = x.value.copySymNodeValue - z.inputs = x.inputs - z.forward = x.forward - z.arg = x.arg - z.runCount = x.runCount - z.epoch = x.epoch - z.allocateValue = x.allocateValue - z.backward = x.backward - z.gradients = x.gradients - z.name = x.name - z.tag = x.tag - z.refCount = x.refCount - -proc gradientDependentOrNil*(z: SymNode, dep: SymNode): SymNode = - ## May return nil if not exists. - for g in z.gradients: - if dep == g.dependent: - return g.gradient - # We don't have a matching dependent variable. - return nil - -proc gradientDependentAssign(z: SymNode, dep: SymNode, grad: SymNode) = - ## Replace if exists, otherwise add to the list. - for g in z.gradients.mitems: - if dep == g.dependent: - g.gradient = grad - return - z.gradients.add SymNodeGradient(dependent: dep, gradient: grad) - -proc updated*(z: SymNode) = - ## Tag this SymNode after updating its value. - ## Graph dependency resolution depends on this. - var epoch {.global.} = 0 - epoch.inc - z.epoch = epoch - -proc assign*(z: SymNode, v: SymNodeValueConcrete) = - z.value = v - z.updated - -# -# generic symbol support -# - -proc newSym*(s: string): SymNode = - newSymNode(name = s) - -method identAllocateSymNodeValue*(x: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr) - -method identSymNodeValue*(z: SymNodeValue, x: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) - -method zeroAllocateSymNodeValue*(x: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr) - -method zeroSymNodeValue*(z: SymNodeValue, x: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) - -method indexAllocateSymNodeValue*(x: SymNodeValue, i: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr & "\n " & i.repr) - -method indexSymNodeValue*(z: SymNodeValue, x: SymNodeValue, i: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & i.repr) - -method indexUpdateAllocateSymNodeValue*(x: SymNodeValue, i: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr & "\n " & i.repr & "\n " & y.repr) - -method indexUpdateSymNodeValue*(z: SymNodeValue, x: SymNodeValue, i: SymNodeValue, y: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & i.repr & "\n " & y.repr) - -method negAllocateSymNodeValue*(x: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr) - -method negSymNodeValue*(z: SymNodeValue, x: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) - -method invAllocateSymNodeValue*(x: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr) - -method invSymNodeValue*(z: SymNodeValue, x: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) - -method addAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) - -method addSymNodeValue*(z: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & y.repr) - -method mulAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) - -method mulSymNodeValue*(z: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & y.repr) - -method subAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) - -method subSymNodeValue*(z: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & y.repr) - -method divideAllocateSymNodeValue*(x: SymNodeValue, y: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr & "\n " & y.repr) - -method divideSymNodeValue*(z: SymNodeValue, x: SymNodeValue, y: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr & "\n " & y.repr) - -method expAllocateSymNodeValue*(x: SymNodeValue): SymNodeValue {.base.} = - raiseErrorBaseMethod("args:\n " & x.repr) - -method expSymNodeValue*(z: SymNodeValue, x: SymNodeValue) {.base.} = - raiseErrorBaseMethod("args:\n " & z.repr & "\n " & x.repr) - -# -# support of generic multiple values per node -# - -type SymNodeValueTuple* = ref object of SymNodeValue - tupleValue*: seq[SymNodeValue] - -method getTuple*(v: SymNodeValue): seq[SymNodeValue] {.base.} = - raiseValueError("Custom method required for value: " & $v) - -method getTuple*(v:SymNodeValueTuple): seq[SymNodeValue] = v.tupleValue - -method `$`*(v: SymNodeValueTuple): string = $v.tupleValue - -type SymNodeValueInt* = ref object of SymNodeValue - intValue*: int - -method getInt*(v: SymNodeValue): int {.base.} = - raiseValueError("Custom method required for value: " & $v) - -method getInt*(v:SymNodeValueInt): int = v.intValue - -method `$`*(v: SymNodeValueInt): string = $v.intValue - -method identAllocateSymNodeValue*(x: SymNodeValueTuple): SymNodeValue = - var vs = newseq[SymNodeValue](x.tupleValue.len) - for i in 0.. 0: - raiseError("inputs.len: " & $z.inputs.len & ", but no forward function defined for:\n" & z.nodeRepr) - z.epoch = highEpoch - -proc eval*(z: SymNode) = - z.evalRec - z.tagClearRec sntVisited - -proc tagUpdateNeedGradientRec(z: SymNode) = - if sntVisited in z.tag: - return - z.tag.incl sntVisited - var needgradient = false - for i in z.inputs: - i.tagUpdateNeedGradientRec - needgradient = needgradient or sntNeedGradient in i.tag - if needgradient and sntNeedGradient notin z.tag: - z.tag.incl sntNeedGradient - -proc gradientRec(z: SymNode, dep: SymNode) = - ## gradient of dep with respect to z - # We tag newly created nodes from z.backward(z, i, dep), with needUpdate. - for i in 0.. 0: - result &= ", inputs: {" - for i in 0.. 0: - result &= ", " - result &= "[" & $i & "]: " & $z.inputs[i] - result &= "}" - if z.gradients.len > 0: - result &= ", gradients: {" - for i in 0.. 0: - result &= ", " - result &= "[" & $i & "]: " & $z.gradients[i].dependent & " -> " & $z.gradients[i].gradient - result &= "}" - if z.value != nil: - result &= ": " & $z.value - -func toStringRec(z: SymNode, pre: string, shared: seq[SymNode]): string = - result = pre & z.nodeRepr - if sntVisited in z.tag: - result &= " [shared]" - else: - z.tag.incl sntVisited - for zid in 0.. 0: - quit 1 From 0703a772f74be6955caf72e94a22745cecbee9bd Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Thu, 11 Jul 2024 17:50:22 -0500 Subject: [PATCH 35/60] experimental/graph: typed graph w/ dyn dispatch when build, scalar --- src/experimental/graph/core.nim | 277 ++++++++++++++++++++++++++++++ src/experimental/graph/scalar.nim | 184 ++++++++++++++++++++ src/experimental/graph/tgraph.nim | 206 ++++++++++++++++++++++ 3 files changed, 667 insertions(+) create mode 100644 src/experimental/graph/core.nim create mode 100644 src/experimental/graph/scalar.nim create mode 100644 src/experimental/graph/tgraph.nim diff --git a/src/experimental/graph/core.nim b/src/experimental/graph/core.nim new file mode 100644 index 0000000..6827185 --- /dev/null +++ b/src/experimental/graph/core.nim @@ -0,0 +1,277 @@ +#[ + +- Graph traversals are not thread safe +- backward functions for scalar output may receive nil gradient + +TODO + +- if +- function/lambda + +]# + +from strutils import join, toHex, strip + +type + Gfunc* {.acyclic.} = ref object + ## Represent an functional operation: [input] -> output, + forward: proc(z: Gvalue) + arg: Gvalue ## extra argument forward/backward uses, must be immutable and can be shared, use getArg/setArg + backward: proc(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue ## create new graph for backprop + runCount: int + name: string + Gtag = enum + gtVisited, gtGrad, gtFixedGrad + Gtags = set[Gtag] + Gvalue* {.acyclic.} = ref object of RootObj + ## A Value knows its dependencies, which allows backpropagation. + tag: Gtags + inputs*: seq[Gvalue] + gfunc*: Gfunc + epoch: int + +type + GraphError* = object of Defect + GraphValueError* = object of GraphError + +template raiseError*(msg: string) = + raise newException(GraphError, msg) + +template raiseValueError*(msg: string) = + raise newException(GraphValueError, msg) + +template raiseErrorBaseMethod*(msg: string) = + raiseError( + "Base method invoked: " & msg & + "\nMake sure to pass `--multimethods:on` and check there is a custom method for each derived type.") + +var graphDebug* = false + +proc newGfunc*( + forward: proc(z: Gvalue) = nil, + arg: Gvalue = nil, + backward: proc(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = nil, + name: string): Gfunc = + Gfunc( + forward: forward, + arg: arg, + backward: backward, + name: name) + +proc runCount*(f: Gfunc): int = f.runCount + +proc `$`*(x: Gfunc): string + +method `$`*(x: Gvalue): string {.base.} = + let f = x.gfunc + result = "Gvalue(" & $x.epoch & " " & $x.tag & ")" + if f != nil: + result &= " " & $f + +proc `$`*(x: Gfunc): string = + if x.arg == nil: + x.name & "<" & $x.runCount & ">" + else: + x.name & "<" & $x.runCount & ", " & $x.arg & ">" + +proc nodeRepr*(x: Gvalue): string = + let f = x.gfunc + result = $x & " (" & $x.epoch & " " & $x.tag & ")" & "@0X" & strip(toHex(cast[int](x)), trailing = false, chars = {'0'}) + if f != nil: + result &= " " & $f & "@0X" & strip(toHex(cast[int](f)), trailing = false, chars = {'0'}) + +method copyGvalue*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("copyGvalue(" & $x & ")") +method assignGvalue*(z: Gvalue, x: Gvalue) {.base.} = + z.tag = x.tag + z.inputs = x.inputs + z.gfunc = x.gfunc + z.epoch = x.epoch + +let identPlaceholderGFunc = newGfunc(name = "identPlaceholder") +proc identPlaceholder(x: Gvalue): Gvalue = + result = x.copyGvalue + result.tag = {} + result.inputs = @[x] + result.gfunc = identPlaceholderGFunc + result.epoch = 0 + +proc tagClearVisited(x: Gvalue) = + ## only works after recursive proc used gtVisited for the graph traversal. + if gtVisited in x.tag: + x.tag.excl gtVisited + for i in x.inputs: + i.tagClearVisited + +proc tagClear(x: Gvalue, t: Gtag) = + proc c(v: Gvalue) = + if gtVisited in v.tag: + return + v.tag.incl gtVisited + v.tag.excl t + for i in v.inputs: + i.c + x.c + x.tagClearVisited + +proc treeRepr*(v: Gvalue): string = + var shared = newseq[Gvalue]() + proc s(x: Gvalue) = + if gtVisited in x.tag: + if shared.find(x) < 0: + shared.add x + else: + x.tag.incl gtVisited + for i in x.inputs: + i.s + proc r(x: Gvalue): seq[string] = + let si = shared.find x + result = @[x.nodeRepr] + if gtVisited in x.tag: + result[0] &= " #" & $si + else: + if si >= 0: + result[0] &= " #" & $si & "#" + x.tag.incl gtVisited + for i in x.inputs: + for ir in i.r: + result.add(" " & ir) + v.s + v.tagClearVisited + result = v.r.join "\n" + v.tagClearVisited + +method `-`*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`-`(" & $x & ")") +method `+`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`+`(" & $x & ", " & $y & ")") +method `*`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`*`(" & $x & ", " & $y & ")") +method `-`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`-`(" & $x & ", " & $y & ")") +method `/`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`/`(" & $x & ", " & $y & ")") + +proc updated*(x: Gvalue) = + var epoch {.global.} = 0 + inc epoch + x.epoch = epoch + +proc eval*(v: Gvalue) = + proc r(x: Gvalue) = + if gtVisited in x.tag: + return + x.tag.incl gtVisited + var maxep = 0 + for i in x.inputs: + i.r + if maxep < i.epoch: + maxep = i.epoch + if x.epoch < maxep: + let f = x.gfunc + if graphDebug: + var s = "[graph/core] eval: " & x.nodeRepr + for c in x.inputs: + s &= "\n " & c.nodeRepr + echo s + if f.forward != nil: + x.epoch = maxep + f.runCount.inc + f.forward x + else: + raiseError("inputs.len: " & $x.inputs.len & ", but no forward function defined for:\n" & x.nodeRepr) + v.r + v.tagClearVisited + +type + Grad = object + input: Gvalue + grad: Gvalue + Grads = object + output: Gvalue + grads: seq[Grad] + +var gradientList = newseq[Grads]() + +proc dumpGradientList* = + echo "# Gradient List:" + for gs in gradientList: + echo "## output: ",gs.output.nodeRepr + for g in gs.grads: + echo "### w.r.t.: ",g.input.nodeRepr + echo g.grad.treeRepr + +proc recordGrad(input: Gvalue, output: Gvalue, gradient: Gvalue) = + for k in 0..= 0: + for k in 0.. Date: Wed, 17 Jul 2024 18:33:24 -0500 Subject: [PATCH 36/60] experimental/graph: add gauge & multi value support --- src/experimental/graph/core.nim | 23 +- src/experimental/graph/gauge.nim | 655 +++++++++++++++++++++++++++++ src/experimental/graph/multi.nim | 98 +++++ src/experimental/graph/scalar.nim | 34 +- src/experimental/graph/tggauge.nim | 287 +++++++++++++ src/experimental/graph/tgraph.nim | 123 +++--- 6 files changed, 1134 insertions(+), 86 deletions(-) create mode 100644 src/experimental/graph/gauge.nim create mode 100644 src/experimental/graph/multi.nim create mode 100644 src/experimental/graph/tggauge.nim diff --git a/src/experimental/graph/core.nim b/src/experimental/graph/core.nim index 6827185..fbad87d 100644 --- a/src/experimental/graph/core.nim +++ b/src/experimental/graph/core.nim @@ -16,7 +16,6 @@ type Gfunc* {.acyclic.} = ref object ## Represent an functional operation: [input] -> output, forward: proc(z: Gvalue) - arg: Gvalue ## extra argument forward/backward uses, must be immutable and can be shared, use getArg/setArg backward: proc(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue ## create new graph for backprop runCount: int name: string @@ -49,12 +48,10 @@ var graphDebug* = false proc newGfunc*( forward: proc(z: Gvalue) = nil, - arg: Gvalue = nil, backward: proc(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = nil, name: string): Gfunc = Gfunc( forward: forward, - arg: arg, backward: backward, name: name) @@ -68,11 +65,7 @@ method `$`*(x: Gvalue): string {.base.} = if f != nil: result &= " " & $f -proc `$`*(x: Gfunc): string = - if x.arg == nil: - x.name & "<" & $x.runCount & ">" - else: - x.name & "<" & $x.runCount & ", " & $x.arg & ">" +proc `$`*(x: Gfunc): string = x.name & "<" & $x.runCount & ">" proc nodeRepr*(x: Gvalue): string = let f = x.gfunc @@ -80,12 +73,19 @@ proc nodeRepr*(x: Gvalue): string = if f != nil: result &= " " & $f & "@0X" & strip(toHex(cast[int](f)), trailing = false, chars = {'0'}) -method copyGvalue*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("copyGvalue(" & $x & ")") -method assignGvalue*(z: Gvalue, x: Gvalue) {.base.} = +method newOneOf*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("newOneOf(" & $x & ")") +method valCopy*(z: Gvalue, x: Gvalue) {.base.} = raiseErrorBaseMethod("valCopy(" & $z & "," & $x & ")") + +proc assignGvalue(z: Gvalue, x: Gvalue) = z.tag = x.tag z.inputs = x.inputs z.gfunc = x.gfunc z.epoch = x.epoch + z.valCopy x + +proc copyGvalue(x: Gvalue): Gvalue = + result = newOneOf x + result.assignGvalue x let identPlaceholderGFunc = newGfunc(name = "identPlaceholder") proc identPlaceholder(x: Gvalue): Gvalue = @@ -151,7 +151,7 @@ proc updated*(x: Gvalue) = inc epoch x.epoch = epoch -proc eval*(v: Gvalue) = +proc eval*(v: Gvalue): Gvalue {.discardable.} = proc r(x: Gvalue) = if gtVisited in x.tag: return @@ -176,6 +176,7 @@ proc eval*(v: Gvalue) = raiseError("inputs.len: " & $x.inputs.len & ", but no forward function defined for:\n" & x.nodeRepr) v.r v.tagClearVisited + v type Grad = object diff --git a/src/experimental/graph/gauge.nim b/src/experimental/graph/gauge.nim new file mode 100644 index 0000000..d6425cb --- /dev/null +++ b/src/experimental/graph/gauge.nim @@ -0,0 +1,655 @@ +#[ + +- Allocate the field when constructing the graph. +- Call updated after changing the field in the graph after construction. + +]# + +import core, scalar, multi +import layout, ../../gauge, physics/qcdTypes + +type Gauge = seq[DLatticeColorMatrixV] + +type Ggauge* {.final.} = ref object of Gvalue + gval: Gauge + +proc getgauge*(x: Gvalue): Gauge = Ggauge(x).gval + +proc toGvalue*(x: Gauge): Ggauge = + # proc instead of converter to avoid converting seq + result = Ggauge(gval: x) + result.updated + +method newOneOf*(x: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf) +method valCopy*(z: Ggauge, x: Ggauge) = + let u = z.gval + let v = x.gval + threads: + for mu in 0..= b: + let ii = instantiationInfo() + let sa = astToStr a + let sb = astToStr b + checkpoint(ii.filename & ":" & $ii.line & ":" & $ii.column & ": Check failed: " & sa & " :< " & sb) + checkpoint(" " & sa & ": " & $av) + checkpoint(" " & sb & ": " & $b) + fail() + +# basic test: y <- f(x), or z = y B† = f(x) B†, with x = x + t A +# d/dt z = d/dt f(x+tA) B† +# d/dt z = (d/dt y) (d/dy z)† = (d/dt x) (d/dx z)† = (d/dx z) A† + +proc ndiff(zt: Gvalue, t: Gscalar): (float, float) = + proc z(v:float):float = + t.update v + zt.eval.getfloat + var dzdt,e: float + ndiff(dzdt, e, z, 0.0, 0.125, ordMax=3) + (dzdt, e) + +template check(ii: tuple[filename:string, line:int, column:int], ast: string, dzdt, e, gdota: float) = + if not almostEqual(gdota, dzdt, unitsInLastPlace = 64*1024): + checkpoint(ii.filename & ":" & $ii.line & ":" & $ii.column & ": Check failed: " & ast) + checkpoint(" ndiff: " & $dzdt & " +/- " & $e) + checkpoint(" grad: " & $gdota) + checkpoint(" reldelta: " & $(abs(dzdt-gdota)/abs(dzdt+gdota))) + fail() + +template ckforce(s: untyped, f: untyped, x: Gvalue, p: Gvalue) = + let t = Gscalar() + let (dsdt, e) = ndiff(s(exp(t*p)*x), t) + let pdotf = eval(redot(p, f(x))).getfloat + check(instantiationInfo(), astTostr(s(x) -> f(x)), dsdt, e, pdotf) + +template ckgrad(f: untyped, x: Gvalue, a: Gvalue) = + let t = Gscalar() + let (dzdt, e) = ndiff(f(x+t*a), t) + let ff = f(x) + let gdota = eval(redot(grad(ff, x), a)).getfloat + check(instantiationInfo(), astTostr(f(x)), dzdt, e, gdota) + +template ckgrad2(f: untyped, x: Gvalue, y: Gvalue, ax: Gvalue, ay: Gvalue) = + let t = Gscalar() + let (dzdt, e) = ndiff(f(x+t*ax, y+t*ay), t) + let ff = f(x, y) + let gdota = eval(redot(grad(ff, x), ax) + redot(grad(ff, y), ay)).getfloat + check(instantiationInfo(), astTostr(f(x,y)), dzdt, e, gdota) + +template ckgradm(f: untyped, x: Gvalue, a: Gvalue, b: Gvalue) = + let t = Gscalar() + let (dzdt, e) = ndiff(f(x+t*a).redot b, t) + let ff = f(x).redot b + let gdota = eval(redot(grad(ff, x), a)).getfloat + check(instantiationInfo(), astTostr(f(x)), dzdt, e, gdota) + +template ckgradm2(f: untyped, x: Gvalue, y: Gvalue, ax: Gvalue, ay: Gvalue, b: Gvalue) = + let t = Gscalar() + let (dzdt, e) = ndiff(f(x+t*ax, y+t*ay).redot b, t) + let ff = f(x, y).redot b + let gdota = eval(redot(grad(ff, x), ax) + redot(grad(ff, y), ay)).getfloat + check(instantiationInfo(), astTostr(f(x,y)), dzdt, e, gdota) + +template ckgradm3(f: untyped, x: Gvalue, y: Gvalue, u: Gvalue, ax: Gvalue, ay: Gvalue, au: Gvalue, b: Gvalue) = + let t = Gscalar() + let (dzdt, e) = ndiff(f(x+t*ax, y+t*ay, u+t*au).redot b, t) + let ff = f(x, y, u).redot b + let gdota = eval(redot(grad(ff, x), ax) + redot(grad(ff, y), ay) + redot(grad(ff, u), au)).getfloat + check(instantiationInfo(), astToStr(f(x,y,u)), dzdt, e, gdota) + +qexInit() + +let + lat = @[8,8,8,16] + lo = lat.newLayout + seed = 1234567891u64 + vol = lo.physVol +var + r = lo.newRNGField(MRG32k3a, seed) + g = lo.newgauge + u = lo.newgauge + p = lo.newgauge + q = lo.newgauge + m = lo.newgauge + ss = lo.newStoutSmear(0.1) +const nc = g[0][0].nrows +threads: + g.random r + u.random r + p.randomTAH r + q.randomTAH r + m.randomTAH r +for i in 0..4: + ss.smear(g, g) + ss.smear(u, u) +threads: + for t in m: + t *= 0.01 + +let a = 0.5 * (sqrt(5.0) - 1.0) +let b = sqrt(2.0) - 1.0 + +suite "gauge basic": + setup: + let gg {.used.} = toGvalue g + let gu {.used.} = toGvalue u + let gp {.used.} = toGvalue p + let gq {.used.} = toGvalue q + let gm {.used.} = toGvalue m + let x {.used.} = toGvalue a + let y {.used.} = toGvalue b + + test "norm2": + let n2 = gg.norm2 + let p2 = gp.norm2 + let dp = grad(0.5 * p2, gp) + n2 :~ 4.0*float(nc*vol) + dp.norm2 :~ p2 + norm2(dp-gp) :~ 0 + ckgrad(norm2, gm, gq) + + test "redot": + let n2 = gg.redot gg + let p2 = gp.redot gp + let dp = grad(0.5 * p2, gp) + n2.eval :~ 4.0*float(nc*vol) + dp.norm2 :~ p2 + norm2(dp-gp).eval :~ 0 + let pq = gp.redot gq + norm2(grad(pq, gp) - gq) :< 1e-26 + norm2(grad(pq, gq) - gp) :< 1e-26 + ckgrad2(redot, gp, gq, gg, gu) + + test "retr": + let rtp = gp.retr + let n2 = retr(gg * gg.adj) + rtp*rtp :< 1e-20 + n2.eval :~ 4.0*float(nc*vol) + let p2 = retr(gp * gq.adj) + p2 :~ redot(gp, gq) + norm2(grad(p2, gp) - gq) :< 1e-26 + norm2(grad(p2, gq) - gp) :< 1e-26 + ckgrad(retr, gp, gq) + + test "adj": + norm2(gg.adj*gg - 1.0)/float(4*nc*vol) :< 1e-22 + norm2(gg*gg.adj - 1.0)/float(4*nc*vol) :< 1e-22 + norm2(gp.adj + gp) :< 1e-26 + norm2(grad(gp.adj.norm2, gp) - 2.0*gp) :< 1e-26 + ckgradm(adj, gg, gp, gq) + + test "neg": + norm2(gp.adj - (-gp)) :< 1e-26 + norm2(-gp) :~ gp.norm2 + ckgradm(`-`, gg, gp, gq) + + test "addsg": + let p2 = norm2(x+gp) + grad(p2, x) :~ retr(2.0*(a+gp)) + norm2(grad(p2, gp) - 2.0*(a+gp)) :< 1e-26 + ckgradm2(`+`, x, gp, y, gq, gg) + + test "addgg": + let pq = norm2(gp+gq) + norm2(grad(pq, gp) - 2.0*(gp+gq)) :< 1e-26 + norm2(grad(pq, gq) - 2.0*(gp+gq)) :< 1e-26 + ckgradm2(`+`, gq, gp, gu, gg, gm) + + test "mulsg": + let p2 = norm2(x*gp) + grad(p2, x) :~ 2.0*a*gp.norm2 + norm2(grad(p2, gp) - 2.0*a*a*gp) :< 1e-26 + ckgradm2(`*`, x, gp, y, gq, gg) + + test "mulgg": + let pq = norm2(gp*gq) + norm2(grad(pq, gp) - 2.0*gp*gq*gq.adj) :< 1e-24 + norm2(grad(pq, gq) - 2.0*gp.adj*gp*gq) :< 1e-24 + ckgradm2(`*`, gq, gp, gu, gg, gm) + + test "subgs": + let p2 = norm2(gp-x) + grad(p2, x) :~ retr(-2.0*(gp-a)) + norm2(grad(p2, gp) - 2.0*(gp-x)) :< 1e-26 + ckgradm2(`-`, gp, x, gq, y, gg) + + test "subgg": + let pq = norm2(gp-gq) + norm2(grad(pq, gp) - 2.0*(gp-gq)) :< 1e-26 + norm2(grad(pq, gq) - 2.0*(gq-gp)) :< 1e-26 + ckgradm2(`-`, gq, gp, gu, gg, gm) + + test "exp": + let egp = exp(gp) + norm2(egp.adj*egp - 1.0) :< 1e-20 + norm2(egp*egp.adj - 1.0) :< 1e-20 + ckgradm(exp, gm, 0.1*gp, gg) + + test "projTAH": + let gt = gg.projTAH + let tgt = gt.retr + tgt*tgt :< 1e-26 + ckgradm(projTAH, gg, gp, gu) + +suite "gauge fused": + setup: + let gg {.used.} = toGvalue g + let gu {.used.} = toGvalue u + let gp {.used.} = toGvalue p + let gq {.used.} = toGvalue q + let gm {.used.} = toGvalue m + let x {.used.} = toGvalue a + let y {.used.} = toGvalue b + + test "contractProjTAH": + let rf = contractProjTAH(gg, gu) + let rg = projTAH(gg * gu.adj) + norm2(rf - rg) :< 1e-26 + ckgradm2(contractProjTAH, gg, gu, gp, gq, gm) + + test "axexp": + let rf = axexp(x, gm) + let rg = exp(x*gm) + norm2(rf - rg) :< 1e-26 + ckgradm2(axexp, x, gm, y, 0.05*gq, gp) + + test "axexpmuly": + let rf = axexpmuly(x, gm, gg) + let rg = exp(x*gm)*gg + norm2(rf - rg) :< 1e-26 + ckgradm3(axexpmuly, x, gm, gu, y, 0.05*gq, gg, gp) + +suite "gauge action": + let gplaq = block: + var pl = 0.0 + for t in g.plaq: + pl += t + pl + + setup: + let gg {.used.} = toGvalue g + let gu {.used.} = toGvalue u + let gm {.used.} = toGvalue m + + test "wilson action": + let beta = 5.4 + let c = actWilson(beta) + let s = gaugeAction(c, gg) + s :~ -gplaq*float(6*vol*beta) + proc act(x: Gvalue): Gvalue = gaugeAction(c, x) + ckgrad(act, gg, gu) + + test "wilson force": + let beta = 5.4 + let c = actWilson(beta) + proc act(x: Gvalue): Gvalue = gaugeAction(c, x) + proc force(x: Gvalue): Gvalue = gaugeForce(c, x) + ckforce(act, force, gg, 4.0*gm) + + test "wilson force gradient": + let beta = 5.4 + let c = actWilson(beta) + proc force(x: Gvalue): Gvalue = gaugeForce(c, x) + ckgradm(force, gg, gu, gm) + +qexFinalize() diff --git a/src/experimental/graph/tgraph.nim b/src/experimental/graph/tgraph.nim index a950df3..27ef194 100644 --- a/src/experimental/graph/tgraph.nim +++ b/src/experimental/graph/tgraph.nim @@ -3,7 +3,16 @@ import math, unittest addOutputFormatter(newConsoleOutputFormatter(colorOutput = false)) import core, scalar -proc `:~`(a:Gvalue, b:float):bool = a.getfloat.almostEqual b + +template checkeq(ii: tuple[filename:string, line:int, column:int], sa: string, a: float, sb: string, b: float) = + if not almostEqual(a, b, unitsInLastPlace = 64): + checkpoint(ii.filename & ":" & $ii.line & ":" & $ii.column & ": Check failed: " & sa & " :~ " & sb) + checkpoint(" " & sa & ": " & $a) + checkpoint(" " & sb & ": " & $b) + fail() + +template `:~`(a:Gvalue, b:float) = + checkeq(instantiationInfo(), astToStr a, a.eval.getfloat, astToStr b, b) suite "scalar basic": # run once before @@ -11,25 +20,23 @@ suite "scalar basic": # before each test let a = 0.5 * (sqrt(5.0) - 1.0) let b = sqrt(2.0) - 1.0 - let x = Gscalar() - let y = Gscalar() - x := a - y := b + let x = toGvalue(a) + let y = toGvalue(b) #teardown: # after each test # run once after test "assign": - require x :~ a - require y :~ b + x :~ a + y :~ b test "n": let z = -x let dx = z.grad x z.eval dx.eval - check z :~ -a - check dx :~ -1.0 + z :~ -a + dx :~ -1.0 test "a": let z = x+y @@ -38,9 +45,9 @@ suite "scalar basic": z.eval dx.eval dy.eval - check z :~ a+b - check dx :~ 1.0 - check dy :~ 1.0 + z :~ a+b + dx :~ 1.0 + dy :~ 1.0 test "m": let z = x*y @@ -49,9 +56,9 @@ suite "scalar basic": z.eval dx.eval dy.eval - check z :~ a*b - check dx :~ b - check dy :~ a + z :~ a*b + dx :~ b + dy :~ a test "s": let z = x-y @@ -60,9 +67,9 @@ suite "scalar basic": z.eval dx.eval dy.eval - check z :~ a-b - check dx :~ 1.0 - check dy :~ -1.0 + z :~ a-b + dx :~ 1.0 + dy :~ -1.0 test "d": let z = x/y @@ -71,17 +78,17 @@ suite "scalar basic": z.eval dx.eval dy.eval - check z :~ a/b - check dx :~ 1.0/b - check dy :~ -a/(b*b) + z :~ a/b + dx :~ 1.0/b + dy :~ -a/(b*b) test "nm": let z = (-x)*x let dx = z.grad x z.eval dx.eval - check z :~ -a*a - check dx :~ -2.0*a + z :~ -a*a + dx :~ -2.0*a test "am": let z = (x+y)*x @@ -90,9 +97,9 @@ suite "scalar basic": z.eval dx.eval dy.eval - check z :~ (a+b)*a - check dx :~ 2.0*a+b - check dy :~ a + z :~ (a+b)*a + dx :~ 2.0*a+b + dy :~ a test "ama": let w = x @@ -101,8 +108,8 @@ suite "scalar basic": let dy = z.grad y z.eval dy.eval - check z :~ (a+b)*(a+b) - check dy :~ 2.0*(a+b) + z :~ (a+b)*(a+b) + dy :~ 2.0*(a+b) test "amd": let w = x @@ -111,8 +118,8 @@ suite "scalar basic": let dy = z.grad y z.eval dy.eval - check z :~ (a+b)*(a+b)/a - check dy :~ 2.0*(a+b)/a + z :~ (a+b)*(a+b)/a + dy :~ 2.0*(a+b)/a test "amnd": let w = x @@ -121,8 +128,8 @@ suite "scalar basic": let dy = z.grad y z.eval dy.eval - check z :~ (a+b)*(-a-b)/a - check dy :~ -2.0*(a+b)/a + z :~ (a+b)*(-a-b)/a + dy :~ -2.0*(a+b)/a test "samnd": let w = x-2.0 @@ -131,8 +138,8 @@ suite "scalar basic": let dy = z.grad y z.eval dy.eval - check z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) - check dy :~ -2.0*(a+b-2.0)/(a-2.0) + z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) + dy :~ -2.0*(a+b-2.0)/(a-2.0) suite "scalar d2": setup: @@ -142,8 +149,8 @@ suite "scalar d2": let d = a + 3.0 * b - 1.0 let x = Gscalar() let y = Gscalar() - x := a - y := b + x.update a + y.update b test "samnd dx dy": let w = x-2.0 @@ -154,9 +161,9 @@ suite "scalar d2": z.eval dy.eval dxy.eval - check z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) - check dy :~ -2.0*(a+b-2.0)/(a-2.0) - check dxy :~ 2.0*b/((a-2.0)*(a-2.0)) + z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) + dy :~ -2.0*(a+b-2.0)/(a-2.0) + dxy :~ 2.0*b/((a-2.0)*(a-2.0)) test "samnd dx dy repeat": let w = x-2.0 @@ -167,22 +174,22 @@ suite "scalar d2": z.eval dy.eval dxy.eval - check z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) - check dy :~ -2.0*(a+b-2.0)/(a-2.0) - check dxy :~ 2.0*b/((a-2.0)*(a-2.0)) - y := c + z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) + dy :~ -2.0*(a+b-2.0)/(a-2.0) + dxy :~ 2.0*b/((a-2.0)*(a-2.0)) + y.update c dy.eval - check dy :~ -2.0*(a+c-2.0)/(a-2.0) - x := d + dy :~ -2.0*(a+c-2.0)/(a-2.0) + x.update d dxy.eval - check dxy :~ 2.0*c/((d-2.0)*(d-2.0)) - y := a + dxy :~ 2.0*c/((d-2.0)*(d-2.0)) + y.update a z.eval dy.eval dxy.eval - check z :~ (d+a-2.0)*(2.0-d-a)/(d-2.0) - check dy :~ -2.0*(d+a-2.0)/(d-2.0) - check dxy :~ 2.0*a/((d-2.0)*(d-2.0)) + z :~ (d+a-2.0)*(2.0-d-a)/(d-2.0) + dy :~ -2.0*(d+a-2.0)/(d-2.0) + dxy :~ 2.0*a/((d-2.0)*(d-2.0)) test "samndpdy dx": let w = x-2.0 @@ -193,14 +200,14 @@ suite "scalar d2": let dx = (u*u).grad x z.eval dx.eval - check z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) - check dx :~ -2.0*(b+a-2.0)*(5.0*b+5.0*a-9.0)*(5.0*b*b+b-5.0*a*a+20.0*a-20.0)/(25.0*(a-2.0)*(a-2.0)*(a-2.0)) - y := c + z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) + dx :~ -2.0*(b+a-2.0)*(5.0*b+5.0*a-9.0)*(5.0*b*b+b-5.0*a*a+20.0*a-20.0)/(25.0*(a-2.0)*(a-2.0)*(a-2.0)) + y.update c dx.eval - check dx :~ -2.0*(c+a-2.0)*(5.0*c+5.0*a-9.0)*(5.0*c*c+c-5.0*a*a+20.0*a-20.0)/(25.0*(a-2.0)*(a-2.0)*(a-2.0)) - x := d - y := a + dx :~ -2.0*(c+a-2.0)*(5.0*c+5.0*a-9.0)*(5.0*c*c+c-5.0*a*a+20.0*a-20.0)/(25.0*(a-2.0)*(a-2.0)*(a-2.0)) + x.update d + y.update a dx.eval u.eval - check u :~ (d+a-2.0)*(2.0-d-a)/(d-2.0) - 0.1*2.0*(d+a-2.0)/(d-2.0) - check dx :~ -2.0*(a+d-2.0)*(5.0*a+5.0*d-9.0)*(5.0*a*a+a-5.0*d*d+20.0*d-20.0)/(25.0*(d-2.0)*(d-2.0)*(d-2.0)) + u :~ (d+a-2.0)*(2.0-d-a)/(d-2.0) - 0.1*2.0*(d+a-2.0)/(d-2.0) + dx :~ -2.0*(a+d-2.0)*(5.0*a+5.0*d-9.0)*(5.0*a*a+a-5.0*d*d+20.0*d-20.0)/(25.0*(d-2.0)*(d-2.0)*(d-2.0)) From 737ffd0d22b52f6774d117f174dc68dd411521e7 Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Wed, 17 Jul 2024 18:44:03 -0500 Subject: [PATCH 37/60] graph/tggauge: loosen test tolerance --- src/experimental/graph/tggauge.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/experimental/graph/tggauge.nim b/src/experimental/graph/tggauge.nim index cf6b2d9..74fef29 100644 --- a/src/experimental/graph/tggauge.nim +++ b/src/experimental/graph/tggauge.nim @@ -41,7 +41,7 @@ proc ndiff(zt: Gvalue, t: Gscalar): (float, float) = (dzdt, e) template check(ii: tuple[filename:string, line:int, column:int], ast: string, dzdt, e, gdota: float) = - if not almostEqual(gdota, dzdt, unitsInLastPlace = 64*1024): + if not almostEqual(gdota, dzdt, unitsInLastPlace = 128*1024): checkpoint(ii.filename & ":" & $ii.line & ":" & $ii.column & ": Check failed: " & ast) checkpoint(" ndiff: " & $dzdt & " +/- " & $e) checkpoint(" grad: " & $gdota) From e6cf14cb4f78824c9a4a6ce1c620a4b23dc6ac6a Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Thu, 18 Jul 2024 13:35:54 -0500 Subject: [PATCH 38/60] experimental/graph: add exp, cond, and more tests --- src/experimental/graph/core.nim | 4 +- src/experimental/graph/gauge.nim | 26 +-- src/experimental/graph/multi.nim | 27 +-- src/experimental/graph/scalar.nim | 299 ++++++++++++++++++++++++++++- src/experimental/graph/tggauge.nim | 13 ++ src/experimental/graph/tgraph.nim | 134 ++++++++----- 6 files changed, 418 insertions(+), 85 deletions(-) diff --git a/src/experimental/graph/core.nim b/src/experimental/graph/core.nim index fbad87d..1167caf 100644 --- a/src/experimental/graph/core.nim +++ b/src/experimental/graph/core.nim @@ -5,7 +5,6 @@ TODO -- if - function/lambda ]# @@ -73,7 +72,7 @@ proc nodeRepr*(x: Gvalue): string = if f != nil: result &= " " & $f & "@0X" & strip(toHex(cast[int](f)), trailing = false, chars = {'0'}) -method newOneOf*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("newOneOf(" & $x & ")") +method newOneOf*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("newOneOf(" & $x & ")") ## Be sure to zero init fields method valCopy*(z: Gvalue, x: Gvalue) {.base.} = raiseErrorBaseMethod("valCopy(" & $z & "," & $x & ")") proc assignGvalue(z: Gvalue, x: Gvalue) = @@ -145,6 +144,7 @@ method `+`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`+`(" method `*`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`*`(" & $x & ", " & $y & ")") method `-`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`-`(" & $x & ", " & $y & ")") method `/`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`/`(" & $x & ", " & $y & ")") +method exp*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("exp(" & $x & ")") proc updated*(x: Gvalue) = var epoch {.global.} = 0 diff --git a/src/experimental/graph/gauge.nim b/src/experimental/graph/gauge.nim index d6425cb..f058cd6 100644 --- a/src/experimental/graph/gauge.nim +++ b/src/experimental/graph/gauge.nim @@ -20,7 +20,12 @@ proc toGvalue*(x: Gauge): Ggauge = result = Ggauge(gval: x) result.updated -method newOneOf*(x: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf) +method newOneOf*(x: Ggauge): Gvalue = + let g = x.gval.newOneOf + threads: + for f in g: + f := 0.0 + Ggauge(gval: g) method valCopy*(z: Ggauge, x: Ggauge) = let u = z.gval let v = x.gval @@ -38,7 +43,6 @@ method retr*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("retr(" & $x & " method adj*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("adj(" & $x & ")") method norm2*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("norm2(" & $x & ")") method redot*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("redot(" & $x & "," & $y & ")") -method exp*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("exp(" & $x & ")") method expDeriv*(b: Gvalue, x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("expDeriv(" & $b & "," & $x & ")") method projTAH*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("projTAH(" & $x & ")") @@ -133,7 +137,7 @@ proc neggb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = let negg = newGfunc(forward = neggf, backward = neggb, name = "-g") -method `-`(x: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x)], gfunc: negg) +method `-`*(x: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x)], gfunc: negg) proc addsgb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -156,7 +160,7 @@ proc addsgf(v: Gvalue) = let addsg = newGfunc(forward = addsgf, backward = addsgb, name = "s+g") -method `+`(x: Gscalar, y: Ggauge): Gvalue = Ggauge(gval: y.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: addsg) +method `+`*(x: Gscalar, y: Ggauge): Gvalue = Ggauge(gval: y.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: addsg) proc addggb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -174,7 +178,7 @@ proc addggf(v: Gvalue) = let addgg = newGfunc(forward = addggf, backward = addggb, name = "g+g") -method `+`(x: Ggauge, y: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: addgg) +method `+`*(x: Ggauge, y: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: addgg) proc mulsgb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -197,7 +201,7 @@ proc mulsgf(v: Gvalue) = let mulsg = newGfunc(forward = mulsgf, backward = mulsgb, name = "s*g") -method `*`(x: Gscalar, y: Ggauge): Gvalue = Ggauge(gval: y.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: mulsg) +method `*`*(x: Gscalar, y: Ggauge): Gvalue = Ggauge(gval: y.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: mulsg) proc mulggb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -220,7 +224,7 @@ proc mulggf(v: Gvalue) = let mulgg = newGfunc(forward = mulggf, backward = mulggb, name = "g*g") -method `*`(x: Ggauge, y: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: mulgg) +method `*`*(x: Ggauge, y: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: mulgg) proc redotggb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i @@ -249,7 +253,7 @@ proc redotggf(v: Gvalue) = let redotgg = newGfunc(forward = redotggf, backward = redotggb, name = "redotgg") -method redot(x: Ggauge, y: Ggauge): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: redotgg) +method redot*(x: Ggauge, y: Ggauge): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: redotgg) proc subgsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -272,7 +276,7 @@ proc subgsf(v: Gvalue) = let subgs = newGfunc(forward = subgsf, backward = subgsb, name = "g-s") -method `-`(x: Ggauge, y: Gscalar): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: subgs) +method `-`*(x: Ggauge, y: Gscalar): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: subgs) proc subggb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -295,7 +299,7 @@ proc subggf(v: Gvalue) = let subgg = newGfunc(forward = subggf, backward = subggb, name = "g-g") -method `-`(x: Ggauge, y: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: subgg) +method `-`*(x: Ggauge, y: Ggauge): Gvalue = Ggauge(gval: x.gval.newOneOf, inputs: @[Gvalue(x), y], gfunc: subgg) proc expgb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = if zb == nil: @@ -531,7 +535,7 @@ proc redotccf(v: Gvalue) = let redotcc = newGfunc(forward = redotccf, backward = redotccb, name = "redotcc") -method redot(x: Gactcoeff, y: Gactcoeff): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: redotcc) +method redot*(x: Gactcoeff, y: Gactcoeff): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: redotcc) const C1Symanzik = -1.0/12.0 # tree-level diff --git a/src/experimental/graph/multi.nim b/src/experimental/graph/multi.nim index a1edb3b..4fcd37a 100644 --- a/src/experimental/graph/multi.nim +++ b/src/experimental/graph/multi.nim @@ -1,30 +1,9 @@ import core, scalar type - Gint* {.final.} = ref object of Gvalue - ival: int Gmulti* {.final.} = ref object of Gvalue mval: seq[Gvalue] -proc getint*(x: Gvalue): int = Gint(x).ival - -proc `getint=`*(x: Gvalue, y: int) = - let xs = Gint(x) - xs.ival = y - -proc update*(x: Gvalue, y: int) = - x.getint = y - x.updated - -converter toGvalue*(x: int): Gvalue = - result = Gint(ival: x) - result.updated - -method newOneOf*(x: Gint): Gvalue = Gint() -method valCopy*(z: Gint, x: Gint) = z.ival = x.ival - -method `$`*(x: Gint): string = $x.ival - proc getmulti*(x: Gvalue): seq[Gvalue] = Gmulti(x).mval proc `getmulti=`*(x: Gvalue, y: seq[Gvalue]) = @@ -58,7 +37,7 @@ method updateAt*(x: Gvalue, i: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorB proc getAtmb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i of 0: - return z.inputs[0].updateAt(i, zb) + return z.inputs[0].newOneOf.updateAt(i, zb) else: raiseValueError("i must be 0, got: " & $i) @@ -70,14 +49,14 @@ proc getAtmf(v: Gvalue) = let getAtm = newGfunc(forward = getAtmf, backward = getAtmb, name = "getAtm") method `[]`*(x: Gmulti, i: Gint): Gvalue = - result = newOneOf x.mval[i.ival] + result = newOneOf x.mval[i.getint] result.inputs = @[Gvalue(x), i] result.gfunc = getAtm proc updateAtmb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i of 0: - return zb.updateAt(i, 0.0) # TODO: need a zero method + return zb.updateAt(i, zb.inputs[0].newOneOf) of 2: return zb[i] else: diff --git a/src/experimental/graph/scalar.nim b/src/experimental/graph/scalar.nim index f490766..a152e35 100644 --- a/src/experimental/graph/scalar.nim +++ b/src/experimental/graph/scalar.nim @@ -1,4 +1,5 @@ import core +from math import exp type Gscalar* {.final.} = ref object of Gvalue @@ -6,6 +7,8 @@ type ## `getfloat=` changes the float value, useful during the graph construction ## `update` calls `getfloat=` and use `updated` to signal re-evaluation of the graph after graph construction sval: float + Gint* {.final.} = ref object of Gvalue + ival: int proc getfloat*(x: Gvalue): float = Gscalar(x).sval @@ -26,6 +29,25 @@ method valCopy*(z: Gscalar, x: Gscalar) = z.sval = x.sval method `$`*(x: Gscalar): string = $x.sval +proc getint*(x: Gvalue): int = Gint(x).ival + +proc `getint=`*(x: Gvalue, y: int) = + let xs = Gint(x) + xs.ival = y + +proc update*(x: Gvalue, y: int) = + x.getint = y + x.updated + +converter toGvalue*(x: int): Gvalue = + result = Gint(ival: x) + result.updated + +method newOneOf*(x: Gint): Gvalue = Gint() +method valCopy*(z: Gint, x: Gint) = z.ival = x.ival + +method `$`*(x: Gint): string = $x.ival + proc negsf(v: Gvalue) = let x = Gscalar(v.inputs[0]) let z = Gscalar(v) @@ -43,7 +65,7 @@ proc negsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = let gsneg = newGfunc(forward = negsf, backward = negsb, name = "-") -method `-`(x: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x)], gfunc: gsneg) +method `-`*(x: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x)], gfunc: gsneg) proc addsf(v: Gvalue) = let x = Gscalar(v.inputs[0]) @@ -59,7 +81,7 @@ proc addsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = let gsadd = newGfunc(forward = addsf, backward = addsb, name = "+") -method `+`(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gsadd) +method `+`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gsadd) proc mulsf(v: Gvalue) = let x = Gscalar(v.inputs[0]) @@ -75,7 +97,7 @@ proc mulsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = let gsmul = newGfunc(forward = mulsf, backward = mulsb, name = "*") -method `*`(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gsmul) +method `*`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gsmul) proc subsf(v: Gvalue) = let x = Gscalar(v.inputs[0]) @@ -100,7 +122,7 @@ proc subsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = let gssub = newGfunc(forward = subsf, backward = subsb, name = "-") -method `-`(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gssub) +method `-`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gssub) proc divsf(v: Gvalue) = let x = Gscalar(v.inputs[0]) @@ -128,7 +150,274 @@ proc divsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = let gsdiv = newGfunc(forward = divsf, backward = divsb, name = "/") -method `/`(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gsdiv) +method `/`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: gsdiv) + +proc expsf(v: Gvalue) = + let x = Gscalar(v.inputs[0]) + let z = Gscalar(v) + z.sval = exp(x.sval) + +proc expsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0: + if zb == nil: + return z + else: + return zb*z + else: + raiseValueError("i must be 0, got: " & $i) + +let exps = newGfunc(forward = expsf, backward = expsb, name = "exps") + +method exp*(x: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x)], gfunc: exps) + +method `not`*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("not(" & $x & ")") +method `and`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("and(" & $x & ", " & $y & ")") +method `or`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("or(" & $x & ", " & $y & ")") +method `<`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`<`(" & $x & ", " & $y & ")") +method equal*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("equal(" & $x & ", " & $y & ")") + +method cond*(c: Gvalue, x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("cond(" & $c & "," & $x & ", " & $y & ")") + +proc `>`*(x, y: Gvalue): Gvalue = not(x < y) +proc `>=`*(x, y: Gvalue): Gvalue = x > y or equal(x,y) +proc `<=`*(x, y: Gvalue): Gvalue = x < y or equal(x,y) +proc `!=`*(x, y: Gvalue): Gvalue = not equal(x,y) +proc `xor`*(x, y: Gvalue): Gvalue = not equal(not(x), not(y)) # uses `not` to convert to 0/1 + +proc notsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0: + return toGvalue(0.0) + else: + raiseValueError("i must be 0, got: " & $i) + +proc notsf(v: Gvalue) = + let x = Gscalar(v.inputs[0]) + let z = Gscalar(v) + z.sval = if x.sval == 0.0: 1.0 else: 0.0 + +let nots = newGfunc(forward = notsf, backward = notsb, name = "nots") + +method `not`*(x: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x)], gfunc: nots) + +proc andsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0.0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc andsf(v: Gvalue) = + let x = Gscalar(v.inputs[0]) + let y = Gscalar(v.inputs[1]) + let z = Gscalar(v) + z.sval = if x.sval == 0.0 or y.sval == 0.0: 0.0 else: 1.0 + +let ands = newGfunc(forward = andsf, backward = andsb, name = "ands") + +method `and`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: ands) + +proc orsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0.0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc orsf(v: Gvalue) = + let x = Gscalar(v.inputs[0]) + let y = Gscalar(v.inputs[1]) + let z = Gscalar(v) + z.sval = if x.sval == 0.0 and y.sval == 0.0: 0.0 else: 1.0 + +let ors = newGfunc(forward = orsf, backward = orsb, name = "ors") + +method `or`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: ors) + +proc ltsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0.0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc ltsf(v: Gvalue) = + let x = Gscalar(v.inputs[0]) + let y = Gscalar(v.inputs[1]) + let z = Gscalar(v) + z.sval = if x.sval < y.sval: 1.0 else: 0.0 + +let lts = newGfunc(forward = ltsf, backward = ltsb, name = "lts") + +method `<`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: lts) + +proc equalsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0.0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc equalsf(v: Gvalue) = + let x = Gscalar(v.inputs[0]) + let y = Gscalar(v.inputs[1]) + let z = Gscalar(v) + z.sval = if x.sval == y.sval: 1.0 else: 0.0 + +let equals = newGfunc(forward = equalsf, backward = equalsb, name = "equals") + +method equal*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: equals) + +proc condsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0: + return toGvalue(0.0) + of 1: + if zb == nil: + # the output must be a scalar, otherwise crash later + return cond(z.inputs[0], toGvalue(1.0), toGvalue(0.0)) + else: + return cond(z.inputs[0], zb, zb.newOneOf) + of 2: + if zb == nil: + # the output must be a scalar, otherwise crash later + return cond(z.inputs[0], toGvalue(0.0), toGvalue(1.0)) + else: + return cond(z.inputs[0], zb.newOneOf, zb) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc condsf(v: Gvalue) = + let c = Gscalar(v.inputs[0]) + if c.sval == 0.0: + v.valCopy v.inputs[2] + else: + v.valCopy v.inputs[1] + +let conds = newGfunc(forward = condsf, backward = condsb, name = "conds") + +method cond*(c: Gscalar, x: Gvalue, y: Gvalue): Gvalue = + result = x.newOneOf + result.inputs = @[Gvalue(c), x, y] + result.gfunc = conds + +proc notib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0: + return toGvalue(0) + else: + raiseValueError("i must be 0, got: " & $i) + +proc notif(v: Gvalue) = + let x = Gint(v.inputs[0]) + let z = Gint(v) + z.ival = if x.ival == 0: 1 else: 0 + +let noti = newGfunc(forward = notif, backward = notib, name = "noti") + +method `not`*(x: Gint): Gvalue = Gint(inputs: @[Gvalue(x)], gfunc: noti) + +proc andib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc andif(v: Gvalue) = + let x = Gint(v.inputs[0]) + let y = Gint(v.inputs[1]) + let z = Gint(v) + z.ival = if x.ival == 0 or y.ival == 0: 0 else: 1 + +let andi = newGfunc(forward = andif, backward = andib, name = "andi") + +method `and`*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: andi) + +proc orib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc orif(v: Gvalue) = + let x = Gint(v.inputs[0]) + let y = Gint(v.inputs[1]) + let z = Gint(v) + z.ival = if x.ival == 0 and y.ival == 0: 0 else: 1 + +let ori = newGfunc(forward = orif, backward = orib, name = "ori") + +method `or`*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: ori) + +proc ltib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc ltif(v: Gvalue) = + let x = Gint(v.inputs[0]) + let y = Gint(v.inputs[1]) + let z = Gint(v) + z.ival = if x.ival < y.ival: 1 else: 0 + +let lti = newGfunc(forward = ltif, backward = ltib, name = "lti") + +method `<`*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: lti) + +proc equalib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0, 1: + return toGvalue(0) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc equalif(v: Gvalue) = + let x = Gint(v.inputs[0]) + let y = Gint(v.inputs[1]) + let z = Gint(v) + z.ival = if x.ival == y.ival: 1 else: 0 + +let equali = newGfunc(forward = equalif, backward = equalib, name = "equali") + +method equal*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: equali) + +proc condib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0: + return toGvalue(0) + of 1: + if zb == nil: + # the output must be a scalar, otherwise crash later + return cond(z.inputs[0], toGvalue(1.0), toGvalue(0.0)) + else: + return cond(z.inputs[0], zb, zb.newOneOf) + of 2: + if zb == nil: + # the output must be a scalar, otherwise crash later + return cond(z.inputs[0], toGvalue(0.0), toGvalue(1.0)) + else: + return cond(z.inputs[0], zb.newOneOf, zb) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc condif(v: Gvalue) = + let c = Gint(v.inputs[0]) + if c.ival == 0: + v.valCopy v.inputs[2] + else: + v.valCopy v.inputs[1] + +let condi = newGfunc(forward = condif, backward = condib, name = "condi") + +method cond*(c: Gint, x: Gvalue, y: Gvalue): Gvalue = + result = x.newOneOf + result.inputs = @[Gvalue(c), x, y] + result.gfunc = condi when isMainModule: import math diff --git a/src/experimental/graph/tggauge.nim b/src/experimental/graph/tggauge.nim index 74fef29..a51ce76 100644 --- a/src/experimental/graph/tggauge.nim +++ b/src/experimental/graph/tggauge.nim @@ -237,18 +237,31 @@ suite "gauge fused": let rf = contractProjTAH(gg, gu) let rg = projTAH(gg * gu.adj) norm2(rf - rg) :< 1e-26 + let srf = rf.norm2 + let srg = rg.norm2 + norm2(grad(srf, gg) - grad(srg, gg)) :< 1e-26 + norm2(grad(srf, gu) - grad(srg, gu)) :< 1e-26 ckgradm2(contractProjTAH, gg, gu, gp, gq, gm) test "axexp": let rf = axexp(x, gm) let rg = exp(x*gm) norm2(rf - rg) :< 1e-26 + let srf = retr(rf * gu) + let srg = retr(rg * gu) + grad(srf, x) :~ grad(srg, x) + norm2(grad(srf, gm) - grad(srg, gm)) :< 1e-26 ckgradm2(axexp, x, gm, y, 0.05*gq, gp) test "axexpmuly": let rf = axexpmuly(x, gm, gg) let rg = exp(x*gm)*gg norm2(rf - rg) :< 1e-26 + let srf = retr(rf * gu) + let srg = retr(rg * gu) + grad(srf, x) :~ grad(srg, x) + norm2(grad(srf, gm) - grad(srg, gm)) :< 1e-26 + norm2(grad(srf, gg) - grad(srg, gg)) :< 1e-26 ckgradm3(axexpmuly, x, gm, gu, y, 0.05*gq, gg, gp) suite "gauge action": diff --git a/src/experimental/graph/tgraph.nim b/src/experimental/graph/tgraph.nim index 27ef194..a9d3bd2 100644 --- a/src/experimental/graph/tgraph.nim +++ b/src/experimental/graph/tgraph.nim @@ -33,8 +33,6 @@ suite "scalar basic": test "n": let z = -x let dx = z.grad x - z.eval - dx.eval z :~ -a dx :~ -1.0 @@ -42,9 +40,6 @@ suite "scalar basic": let z = x+y let dx = z.grad x let dy = z.grad y - z.eval - dx.eval - dy.eval z :~ a+b dx :~ 1.0 dy :~ 1.0 @@ -53,9 +48,6 @@ suite "scalar basic": let z = x*y let dx = z.grad x let dy = z.grad y - z.eval - dx.eval - dy.eval z :~ a*b dx :~ b dy :~ a @@ -64,9 +56,6 @@ suite "scalar basic": let z = x-y let dx = z.grad x let dy = z.grad y - z.eval - dx.eval - dy.eval z :~ a-b dx :~ 1.0 dy :~ -1.0 @@ -75,28 +64,37 @@ suite "scalar basic": let z = x/y let dx = z.grad x let dy = z.grad y - z.eval - dx.eval - dy.eval z :~ a/b dx :~ 1.0/b dy :~ -a/(b*b) + test "exp": + let z = exp(x) + let dx = z.grad x + let ddx = dx.grad x + let dddx = ddx.grad x + let e = exp(a) + z :~ e + dx :~ e + ddx :~ e + dddx :~ e + test "nm": let z = (-x)*x let dx = z.grad x - z.eval - dx.eval z :~ -a*a dx :~ -2.0*a + test "nm exp": + let z = (-exp(x))*exp(x) + let dx = z.grad x + z :~ -exp(2.0*a) + dx :~ -2.0*exp(2.0*a) + test "am": let z = (x+y)*x let dx = z.grad x let dy = z.grad y - z.eval - dx.eval - dy.eval z :~ (a+b)*a dx :~ 2.0*a+b dy :~ a @@ -106,8 +104,6 @@ suite "scalar basic": let v = w+y let z = v*v let dy = z.grad y - z.eval - dy.eval z :~ (a+b)*(a+b) dy :~ 2.0*(a+b) @@ -116,8 +112,6 @@ suite "scalar basic": let v = w+y let z = v*v/w let dy = z.grad y - z.eval - dy.eval z :~ (a+b)*(a+b)/a dy :~ 2.0*(a+b)/a @@ -126,8 +120,6 @@ suite "scalar basic": let v = w+y let z = v*(-v)/w let dy = z.grad y - z.eval - dy.eval z :~ (a+b)*(-a-b)/a dy :~ -2.0*(a+b)/a @@ -136,8 +128,6 @@ suite "scalar basic": let v = w+y let z = v*(-v)/w let dy = z.grad y - z.eval - dy.eval z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) dy :~ -2.0*(a+b-2.0)/(a-2.0) @@ -158,9 +148,6 @@ suite "scalar d2": let z = v*(-v)/w let dy = z.grad y let dxy = dy.grad x - z.eval - dy.eval - dxy.eval z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) dy :~ -2.0*(a+b-2.0)/(a-2.0) dxy :~ 2.0*b/((a-2.0)*(a-2.0)) @@ -171,22 +158,14 @@ suite "scalar d2": let z = v*(-v)/w let dy = z.grad y let dxy = dy.grad x - z.eval - dy.eval - dxy.eval z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) dy :~ -2.0*(a+b-2.0)/(a-2.0) dxy :~ 2.0*b/((a-2.0)*(a-2.0)) y.update c - dy.eval dy :~ -2.0*(a+c-2.0)/(a-2.0) x.update d - dxy.eval dxy :~ 2.0*c/((d-2.0)*(d-2.0)) y.update a - z.eval - dy.eval - dxy.eval z :~ (d+a-2.0)*(2.0-d-a)/(d-2.0) dy :~ -2.0*(d+a-2.0)/(d-2.0) dxy :~ 2.0*a/((d-2.0)*(d-2.0)) @@ -198,16 +177,85 @@ suite "scalar d2": let dy = z.grad y let u = z+0.1*dy let dx = (u*u).grad x - z.eval - dx.eval z :~ (a+b-2.0)*(2.0-a-b)/(a-2.0) dx :~ -2.0*(b+a-2.0)*(5.0*b+5.0*a-9.0)*(5.0*b*b+b-5.0*a*a+20.0*a-20.0)/(25.0*(a-2.0)*(a-2.0)*(a-2.0)) y.update c - dx.eval dx :~ -2.0*(c+a-2.0)*(5.0*c+5.0*a-9.0)*(5.0*c*c+c-5.0*a*a+20.0*a-20.0)/(25.0*(a-2.0)*(a-2.0)*(a-2.0)) x.update d y.update a - dx.eval - u.eval u :~ (d+a-2.0)*(2.0-d-a)/(d-2.0) - 0.1*2.0*(d+a-2.0)/(d-2.0) dx :~ -2.0*(a+d-2.0)*(5.0*a+5.0*d-9.0)*(5.0*a*a+a-5.0*d*d+20.0*d-20.0)/(25.0*(d-2.0)*(d-2.0)*(d-2.0)) + +suite "bool and cond": + setup: + let a = 0.5 * (sqrt(5.0) - 1.0) + let b = sqrt(2.0) - 1.0 + let c = 2.0 * a - 1.0 + let d = a + 3.0 * b - 1.0 + let x = toGvalue a + let y = toGvalue b + + test "condi": + let k = toGvalue 0 + let z = cond(k, x, y) + let dx = z.grad x + let dy = z.grad y + z :~ b + dx :~ 0.0 + dy :~ 1.0 + k.update 1 + z :~ a + dx :~ 1.0 + dy :~ 0.0 + + test "conds": + let k = toGvalue 1.0 + let z = cond(k, x, y) + let dx = z.grad x + let dy = z.grad y + z :~ a + dx :~ 1.0 + dy :~ 0.0 + k.update 0.0 + z :~ b + dx :~ 0.0 + dy :~ 1.0 + + test "condi 2": + let k = toGvalue 0 + let z = cond(k, x, y) + let z2 = z*z + let dx = z2.grad x + let dy = z2.grad y + z2 :~ b*b + dx :~ 0.0 + dy :~ 2.0*b + k.update 1 + y.update c + z2 :~ a*a + dx :~ 2.0*a + dy :~ 0.0 + k.update 0 + z2 :~ c*c + dx :~ 0.0 + dy :~ 2.0*c + + test "conds 2": + let k = toGvalue 1.0 + let z = cond(k, x, y) + let z2 = z*z + let dx = z2.grad x + let dy = z2.grad y + z2 :~ a*a + dx :~ 2.0*a + dy :~ 0.0 + k.update 0.0 + x.update d + z2 :~ b*b + dx :~ 0.0 + dy :~ 2.0*b + k.update 1.0 + x.update c + z2 :~ c*c + dx :~ 2.0*c + dy :~ 0.0 From 266ec12b98bb100622ff93ead17c34d2a23f5d0e Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Thu, 18 Jul 2024 16:49:07 -0500 Subject: [PATCH 39/60] experimental/graph: add stout smear deriv test --- src/experimental/graph/gauge.nim | 7 +++ src/experimental/graph/tggauge.nim | 2 +- src/experimental/graph/tgstout.nim | 98 ++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 src/experimental/graph/tgstout.nim diff --git a/src/experimental/graph/gauge.nim b/src/experimental/graph/gauge.nim index f058cd6..c1d04ff 100644 --- a/src/experimental/graph/gauge.nim +++ b/src/experimental/graph/gauge.nim @@ -15,6 +15,13 @@ type Ggauge* {.final.} = ref object of Gvalue proc getgauge*(x: Gvalue): Gauge = Ggauge(x).gval +proc update*(x: Gvalue, g: Gauge) = + let u = x.getgauge + threads: + for mu in 0.. Date: Thu, 18 Jul 2024 17:13:49 -0500 Subject: [PATCH 40/60] graph/scalar: avoid overriding `!=` --- src/experimental/graph/scalar.nim | 1 - 1 file changed, 1 deletion(-) diff --git a/src/experimental/graph/scalar.nim b/src/experimental/graph/scalar.nim index a152e35..6f019c4 100644 --- a/src/experimental/graph/scalar.nim +++ b/src/experimental/graph/scalar.nim @@ -182,7 +182,6 @@ method cond*(c: Gvalue, x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseM proc `>`*(x, y: Gvalue): Gvalue = not(x < y) proc `>=`*(x, y: Gvalue): Gvalue = x > y or equal(x,y) proc `<=`*(x, y: Gvalue): Gvalue = x < y or equal(x,y) -proc `!=`*(x, y: Gvalue): Gvalue = not equal(x,y) proc `xor`*(x, y: Gvalue): Gvalue = not equal(not(x), not(y)) # uses `not` to convert to 0/1 proc notsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = From 4c080ca012c41848df3358912818a3181d54a5bb Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Thu, 18 Jul 2024 18:18:31 -0500 Subject: [PATCH 41/60] graph/multi: fix issues --- src/experimental/graph/multi.nim | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/experimental/graph/multi.nim b/src/experimental/graph/multi.nim index 4fcd37a..c1f3576 100644 --- a/src/experimental/graph/multi.nim +++ b/src/experimental/graph/multi.nim @@ -37,9 +37,15 @@ method updateAt*(x: Gvalue, i: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorB proc getAtmb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i of 0: - return z.inputs[0].newOneOf.updateAt(i, zb) + if zb == nil: + # the output must be a scalar, otherwise crash later + return z.inputs[0].newOneOf.updateAt(z.inputs[1], toGvalue(1.0)) + else: + return z.inputs[0].newOneOf.updateAt(z.inputs[1], zb) + of 1: + return toGvalue(0) else: - raiseValueError("i must be 0, got: " & $i) + raiseValueError("i must be 0 or 1, got: " & $i) proc getAtmf(v: Gvalue) = let x = Gmulti(v.inputs[0]) @@ -56,11 +62,13 @@ method `[]`*(x: Gmulti, i: Gint): Gvalue = proc updateAtmb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i of 0: - return zb.updateAt(i, zb.inputs[0].newOneOf) + return zb.updateAt(z.inputs[1], z.inputs[2].newOneOf) + of 1: + return toGvalue(0) of 2: return zb[i] else: - raiseValueError("i must be 0 or 2, got: " & $i) + raiseValueError("i must be 0, 1, or 2, got: " & $i) proc updateAtmf(v: Gvalue) = let x = Gmulti(v.inputs[0]) @@ -68,10 +76,13 @@ proc updateAtmf(v: Gvalue) = let z = Gmulti(v) for k in 0.. Date: Thu, 18 Jul 2024 22:47:32 -0500 Subject: [PATCH 42/60] graph/gauge: fix gaugeActionDeriv2 with proper zero init --- src/experimental/graph/gauge.nim | 15 ++++++++++++++- src/experimental/graph/tggauge.nim | 20 ++++++++++++++++++-- src/experimental/graph/tgstout.nim | 10 +++++----- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/experimental/graph/gauge.nim b/src/experimental/graph/gauge.nim index c1d04ff..62ac32b 100644 --- a/src/experimental/graph/gauge.nim +++ b/src/experimental/graph/gauge.nim @@ -40,7 +40,9 @@ method valCopy*(z: Ggauge, x: Ggauge) = for mu in 0.. Date: Mon, 22 Jul 2024 13:24:12 -0500 Subject: [PATCH 43/60] add acacaca --- src/experimental/stagag.nim | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 2cfd00e..8c69038 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -875,6 +875,41 @@ proc setupMDabacaba = addG(g0) addT(t0) +proc setupMDacacaca = + if pt0 == 0: pt0 = 0.116438749543126 + if pg0 == 0: pg0 = 0.283216992495952 + if pf0 == 0: pf0 = 0.283216992495952 + if pgf0 == 0: pgf0 = 0.001247201195115*(2.0/pg0) + if pff0 == 0: pff0 = 0.001247201195115*(2.0/pf0) + if pgf1 == 0: pgf1 = 0.002974030329635*(2.0/(1-2*pg0)) + if pff1 == 0: pff1 = 0.002974030329635*(2.0/(1-2*pf0)) + let t0 = vtau * pushParam(pt0) + let t02 = 2 * t0 + let g0 = vtau * pushParam(pg0) + let f0 = if nf==0: g0 else: vtau * pushParam(pf0) + let t1 = 0.5 * vtau - t0 + let g1 = vtau - 2 * g0 + let f1 = vtau - 2 * f0 + let gf0 = vtau*vtau*pushParam(pgf0) + let ff0 = if nf==0: gf0 else: vtau*vtau*pushParam(pff0) + let gf1 = vtau*vtau*pushParam(pgf1) + let ff1 = if nf==0: gf1 else: vtau*vtau*pushParam(pff1) + addT(t0) + for i in 0.. Date: Mon, 22 Jul 2024 18:40:20 -0500 Subject: [PATCH 44/60] graph: shortcut eval for cond --- src/experimental/graph/core.nim | 70 ++++++++++- src/experimental/graph/scalar.nim | 198 ++++-------------------------- src/experimental/graph/tgraph.nim | 70 +++++++++++ 3 files changed, 161 insertions(+), 177 deletions(-) diff --git a/src/experimental/graph/core.nim b/src/experimental/graph/core.nim index 1167caf..31e57ea 100644 --- a/src/experimental/graph/core.nim +++ b/src/experimental/graph/core.nim @@ -75,6 +75,10 @@ proc nodeRepr*(x: Gvalue): string = method newOneOf*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("newOneOf(" & $x & ")") ## Be sure to zero init fields method valCopy*(z: Gvalue, x: Gvalue) {.base.} = raiseErrorBaseMethod("valCopy(" & $z & "," & $x & ")") +method isTrue*(x: Gvalue): bool {.base.} = raiseErrorBaseMethod("isTrue(" & $x & ")") +method update*(x: Gvalue, y: int) {.base.} = raiseErrorBaseMethod("update(" & $x & "," & $y & ")") +method update*(x: Gvalue, y: float) {.base.} = raiseErrorBaseMethod("update(" & $x & "," & $y & ")") + proc assignGvalue(z: Gvalue, x: Gvalue) = z.tag = x.tag z.inputs = x.inputs @@ -146,6 +150,51 @@ method `-`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`-`(" method `/`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`/`(" & $x & ", " & $y & ")") method exp*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("exp(" & $x & ")") +proc cond*(c: Gvalue, x: Gvalue, y: Gvalue): Gvalue + +proc condb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = + case i + of 0: + let r = z.inputs[0].newOneOf + r.update 0 + return r + of 1: + if zb == nil: + # the output must be a scalar, otherwise crash later + let r1 = z.inputs[1].newOneOf + let r0 = z.inputs[1].newOneOf + r1.update 1 + r0.update 0 + return cond(z.inputs[0], r1, r0) + else: + return cond(z.inputs[0], zb, zb.newOneOf) + of 2: + if zb == nil: + # the output must be a scalar, otherwise crash later + let r0 = z.inputs[2].newOneOf + let r1 = z.inputs[2].newOneOf + r0.update 0 + r1.update 1 + return cond(z.inputs[0], r0, r1) + else: + return cond(z.inputs[0], zb.newOneOf, zb) + else: + raiseValueError("i must be 0 or 1, got: " & $i) + +proc condf(v: Gvalue) = + if v.inputs[0].isTrue: + v.valCopy v.inputs[1] + else: + v.valCopy v.inputs[2] + +let gcond = newGfunc(forward = condf, backward = condb, name = "cond") + +proc cond*(c: Gvalue, x: Gvalue, y: Gvalue): Gvalue = + ## Assume the result is the same type as y, otherwise it'll throw exception later in forward valCopy. + result = y.newOneOf + result.inputs = @[c, x, y] + result.gfunc = gcond + proc updated*(x: Gvalue) = var epoch {.global.} = 0 inc epoch @@ -157,10 +206,23 @@ proc eval*(v: Gvalue): Gvalue {.discardable.} = return x.tag.incl gtVisited var maxep = 0 - for i in x.inputs: - i.r - if maxep < i.epoch: - maxep = i.epoch + if x.gfunc == gcond: + x.inputs[0].r + if maxep < x.inputs[0].epoch: + maxep = x.inputs[0].epoch + if x.inputs[0].isTrue: + x.inputs[1].r + if maxep < x.inputs[1].epoch: + maxep = x.inputs[1].epoch + else: + x.inputs[2].r + if maxep < x.inputs[2].epoch: + maxep = x.inputs[2].epoch + else: + for i in x.inputs: + i.r + if maxep < i.epoch: + maxep = i.epoch if x.epoch < maxep: let f = x.gfunc if graphDebug: diff --git a/src/experimental/graph/scalar.nim b/src/experimental/graph/scalar.nim index 6f019c4..fb126ec 100644 --- a/src/experimental/graph/scalar.nim +++ b/src/experimental/graph/scalar.nim @@ -16,7 +16,7 @@ proc `getfloat=`*(x: Gvalue, y: float) = let xs = Gscalar(x) xs.sval = y -proc update*(x: Gvalue, y: float) = +method update*(x: Gscalar, y: float) = x.getfloat = y x.updated @@ -29,13 +29,23 @@ method valCopy*(z: Gscalar, x: Gscalar) = z.sval = x.sval method `$`*(x: Gscalar): string = $x.sval +method isTrue*(x: Gscalar): bool = x.sval != 0.0 + +proc `getfloat=`*(x: Gvalue, y: int) = + let xs = Gscalar(x) + xs.sval = float(y) + +method update*(x: Gscalar, y: int) = + x.getfloat = y + x.updated + proc getint*(x: Gvalue): int = Gint(x).ival proc `getint=`*(x: Gvalue, y: int) = let xs = Gint(x) xs.ival = y -proc update*(x: Gvalue, y: int) = +method update*(x: Gint, y: int) = x.getint = y x.updated @@ -48,6 +58,8 @@ method valCopy*(z: Gint, x: Gint) = z.ival = x.ival method `$`*(x: Gint): string = $x.ival +method isTrue*(x: Gint): bool = x.ival != 0 + proc negsf(v: Gvalue) = let x = Gscalar(v.inputs[0]) let z = Gscalar(v) @@ -171,68 +183,24 @@ let exps = newGfunc(forward = expsf, backward = expsb, name = "exps") method exp*(x: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x)], gfunc: exps) -method `not`*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("not(" & $x & ")") -method `and`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("and(" & $x & ", " & $y & ")") -method `or`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("or(" & $x & ", " & $y & ")") method `<`*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("`<`(" & $x & ", " & $y & ")") method equal*(x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("equal(" & $x & ", " & $y & ")") -method cond*(c: Gvalue, x: Gvalue, y: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("cond(" & $c & "," & $x & ", " & $y & ")") +proc newFalse(x: Gvalue): Gvalue = + result = x.newOneOf + result.update 0 +proc newTrue(x: Gvalue): Gvalue = + result = x.newOneOf + result.update 1 + +proc `not`*(x: Gvalue): Gvalue = cond(x, x.newFalse, x.newTrue) +proc `and`*(x: Gvalue, y: Gvalue): Gvalue = cond(x, y, y.newFalse) ## return type follows the second operand, as does cond +proc `or`*(x: Gvalue, y: Gvalue): Gvalue = cond(x, y.newTrue, y) ## return type follows the second operand, as does cond +proc `xor`*(x: Gvalue, y: Gvalue): Gvalue = cond(x, not(y), y) ## return type follows the second operand, as does cond proc `>`*(x, y: Gvalue): Gvalue = not(x < y) proc `>=`*(x, y: Gvalue): Gvalue = x > y or equal(x,y) proc `<=`*(x, y: Gvalue): Gvalue = x < y or equal(x,y) -proc `xor`*(x, y: Gvalue): Gvalue = not equal(not(x), not(y)) # uses `not` to convert to 0/1 - -proc notsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0: - return toGvalue(0.0) - else: - raiseValueError("i must be 0, got: " & $i) - -proc notsf(v: Gvalue) = - let x = Gscalar(v.inputs[0]) - let z = Gscalar(v) - z.sval = if x.sval == 0.0: 1.0 else: 0.0 - -let nots = newGfunc(forward = notsf, backward = notsb, name = "nots") - -method `not`*(x: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x)], gfunc: nots) - -proc andsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0, 1: - return toGvalue(0.0) - else: - raiseValueError("i must be 0 or 1, got: " & $i) - -proc andsf(v: Gvalue) = - let x = Gscalar(v.inputs[0]) - let y = Gscalar(v.inputs[1]) - let z = Gscalar(v) - z.sval = if x.sval == 0.0 or y.sval == 0.0: 0.0 else: 1.0 - -let ands = newGfunc(forward = andsf, backward = andsb, name = "ands") - -method `and`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: ands) - -proc orsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0, 1: - return toGvalue(0.0) - else: - raiseValueError("i must be 0 or 1, got: " & $i) - -proc orsf(v: Gvalue) = - let x = Gscalar(v.inputs[0]) - let y = Gscalar(v.inputs[1]) - let z = Gscalar(v) - z.sval = if x.sval == 0.0 and y.sval == 0.0: 0.0 else: 1.0 - -let ors = newGfunc(forward = orsf, backward = orsb, name = "ors") - -method `or`*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: ors) proc ltsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i @@ -268,89 +236,6 @@ let equals = newGfunc(forward = equalsf, backward = equalsb, name = "equals") method equal*(x: Gscalar, y: Gscalar): Gvalue = Gscalar(inputs: @[Gvalue(x), y], gfunc: equals) -proc condsb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0: - return toGvalue(0.0) - of 1: - if zb == nil: - # the output must be a scalar, otherwise crash later - return cond(z.inputs[0], toGvalue(1.0), toGvalue(0.0)) - else: - return cond(z.inputs[0], zb, zb.newOneOf) - of 2: - if zb == nil: - # the output must be a scalar, otherwise crash later - return cond(z.inputs[0], toGvalue(0.0), toGvalue(1.0)) - else: - return cond(z.inputs[0], zb.newOneOf, zb) - else: - raiseValueError("i must be 0 or 1, got: " & $i) - -proc condsf(v: Gvalue) = - let c = Gscalar(v.inputs[0]) - if c.sval == 0.0: - v.valCopy v.inputs[2] - else: - v.valCopy v.inputs[1] - -let conds = newGfunc(forward = condsf, backward = condsb, name = "conds") - -method cond*(c: Gscalar, x: Gvalue, y: Gvalue): Gvalue = - result = x.newOneOf - result.inputs = @[Gvalue(c), x, y] - result.gfunc = conds - -proc notib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0: - return toGvalue(0) - else: - raiseValueError("i must be 0, got: " & $i) - -proc notif(v: Gvalue) = - let x = Gint(v.inputs[0]) - let z = Gint(v) - z.ival = if x.ival == 0: 1 else: 0 - -let noti = newGfunc(forward = notif, backward = notib, name = "noti") - -method `not`*(x: Gint): Gvalue = Gint(inputs: @[Gvalue(x)], gfunc: noti) - -proc andib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0, 1: - return toGvalue(0) - else: - raiseValueError("i must be 0 or 1, got: " & $i) - -proc andif(v: Gvalue) = - let x = Gint(v.inputs[0]) - let y = Gint(v.inputs[1]) - let z = Gint(v) - z.ival = if x.ival == 0 or y.ival == 0: 0 else: 1 - -let andi = newGfunc(forward = andif, backward = andib, name = "andi") - -method `and`*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: andi) - -proc orib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0, 1: - return toGvalue(0) - else: - raiseValueError("i must be 0 or 1, got: " & $i) - -proc orif(v: Gvalue) = - let x = Gint(v.inputs[0]) - let y = Gint(v.inputs[1]) - let z = Gint(v) - z.ival = if x.ival == 0 and y.ival == 0: 0 else: 1 - -let ori = newGfunc(forward = orif, backward = orib, name = "ori") - -method `or`*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: ori) - proc ltib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = case i of 0, 1: @@ -385,39 +270,6 @@ let equali = newGfunc(forward = equalif, backward = equalib, name = "equali") method equal*(x: Gint, y: Gint): Gvalue = Gint(inputs: @[Gvalue(x), y], gfunc: equali) -proc condib(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = - case i - of 0: - return toGvalue(0) - of 1: - if zb == nil: - # the output must be a scalar, otherwise crash later - return cond(z.inputs[0], toGvalue(1.0), toGvalue(0.0)) - else: - return cond(z.inputs[0], zb, zb.newOneOf) - of 2: - if zb == nil: - # the output must be a scalar, otherwise crash later - return cond(z.inputs[0], toGvalue(0.0), toGvalue(1.0)) - else: - return cond(z.inputs[0], zb.newOneOf, zb) - else: - raiseValueError("i must be 0 or 1, got: " & $i) - -proc condif(v: Gvalue) = - let c = Gint(v.inputs[0]) - if c.ival == 0: - v.valCopy v.inputs[2] - else: - v.valCopy v.inputs[1] - -let condi = newGfunc(forward = condif, backward = condib, name = "condi") - -method cond*(c: Gint, x: Gvalue, y: Gvalue): Gvalue = - result = x.newOneOf - result.inputs = @[Gvalue(c), x, y] - result.gfunc = condi - when isMainModule: import math import std/assertions diff --git a/src/experimental/graph/tgraph.nim b/src/experimental/graph/tgraph.nim index a9d3bd2..e83ce25 100644 --- a/src/experimental/graph/tgraph.nim +++ b/src/experimental/graph/tgraph.nim @@ -11,9 +11,19 @@ template checkeq(ii: tuple[filename:string, line:int, column:int], sa: string, a checkpoint(" " & sb & ": " & $b) fail() +template checkeq(ii: tuple[filename:string, line:int, column:int], sa: string, a: int, sb: string, b: int) = + if a != b: + checkpoint(ii.filename & ":" & $ii.line & ":" & $ii.column & ": Check failed: " & sa & " :~ " & sb) + checkpoint(" " & sa & ": " & $a) + checkpoint(" " & sb & ": " & $b) + fail() + template `:~`(a:Gvalue, b:float) = checkeq(instantiationInfo(), astToStr a, a.eval.getfloat, astToStr b, b) +template `:~`(a:Gvalue, b:int) = + checkeq(instantiationInfo(), astToStr a, a.eval.getint, astToStr b, b) + suite "scalar basic": # run once before setup: @@ -195,6 +205,50 @@ suite "bool and cond": let x = toGvalue a let y = toGvalue b + test "not": + let f = toGvalue 0 + not(f) :~ 1 + not(not f) :~ 0 + let t = toGvalue 1.0 + not(t) :~ 0.0 + not(not t) :~ 1.0 + + test "and": + let fi = toGvalue 0 + let ti = toGvalue 1 + let t = toGvalue 1.0 + let f = toGvalue 0.0 + fi and t :~ 0.0 + t and fi :~ 0 + ti and t :~ 1.0 + t and ti :~ 1 + f and fi :~ 0 + fi and f :~ 0.0 + + test "or": + let fi = toGvalue 0 + let ti = toGvalue 1 + let t = toGvalue 1.0 + let f = toGvalue 0.0 + fi or t :~ 1.0 + t or fi :~ 1 + ti or t :~ 1.0 + t or ti :~ 1 + f or fi :~ 0 + fi or f :~ 0.0 + + test "xor": + let fi = toGvalue 0 + let ti = toGvalue 1 + let t = toGvalue 1.0 + let f = toGvalue 0.0 + fi xor t :~ 1.0 + t xor fi :~ 1 + ti xor t :~ 0.0 + t xor ti :~ 0 + f xor fi :~ 0 + fi xor f :~ 0.0 + test "condi": let k = toGvalue 0 let z = cond(k, x, y) @@ -259,3 +313,19 @@ suite "bool and cond": z2 :~ c*c dx :~ 2.0*c dy :~ 0.0 + + test "cond eval shortcut": + let t = toGvalue 2.0 + let f = toGvalue 0.0 + let t2 = t*t + let t3 = t*t*t + check t2.getfloat == 0.0 # should be zero before eval + check t3.getfloat == 0.0 # ditto + var tt = cond(t, t2, t3) + tt :~ 4.0 + check t2.getfloat == 4.0 + check t3.getfloat == 0.0 # should remain zero after eval + tt = cond(f, t3, t2) + tt :~ 4.0 + check t2.getfloat == 4.0 + check t3.getfloat == 0.0 # should remain zero after eval From f31c1b2838f372ecb247c95989d27a85c99eb90f Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Tue, 23 Jul 2024 09:50:44 -0500 Subject: [PATCH 45/60] graph: specialize zero fields, use locals to share compute/memory --- src/experimental/graph/core.nim | 27 +- src/experimental/graph/gauge.nim | 513 ++++++++++++++++++++++------- src/experimental/graph/scalar.nim | 4 +- src/experimental/graph/tggauge.nim | 20 ++ 4 files changed, 425 insertions(+), 139 deletions(-) diff --git a/src/experimental/graph/core.nim b/src/experimental/graph/core.nim index 31e57ea..e322488 100644 --- a/src/experimental/graph/core.nim +++ b/src/experimental/graph/core.nim @@ -26,6 +26,7 @@ type tag: Gtags inputs*: seq[Gvalue] gfunc*: Gfunc + locals*: seq[Gvalue] ## for sharing values between forward and among backward functions epoch: int type @@ -75,7 +76,7 @@ proc nodeRepr*(x: Gvalue): string = method newOneOf*(x: Gvalue): Gvalue {.base.} = raiseErrorBaseMethod("newOneOf(" & $x & ")") ## Be sure to zero init fields method valCopy*(z: Gvalue, x: Gvalue) {.base.} = raiseErrorBaseMethod("valCopy(" & $z & "," & $x & ")") -method isTrue*(x: Gvalue): bool {.base.} = raiseErrorBaseMethod("isTrue(" & $x & ")") +method isZero*(x: Gvalue): bool {.base.} = raiseErrorBaseMethod("isZero(" & $x & ")") method update*(x: Gvalue, y: int) {.base.} = raiseErrorBaseMethod("update(" & $x & "," & $y & ")") method update*(x: Gvalue, y: float) {.base.} = raiseErrorBaseMethod("update(" & $x & "," & $y & ")") @@ -182,10 +183,10 @@ proc condb(zb: Gvalue, z: Gvalue, i: int, dep: Gvalue): Gvalue = raiseValueError("i must be 0 or 1, got: " & $i) proc condf(v: Gvalue) = - if v.inputs[0].isTrue: - v.valCopy v.inputs[1] - else: + if v.inputs[0].isZero: v.valCopy v.inputs[2] + else: + v.valCopy v.inputs[1] let gcond = newGfunc(forward = condf, backward = condb, name = "cond") @@ -200,6 +201,14 @@ proc updated*(x: Gvalue) = inc epoch x.epoch = epoch +proc evaluated*(x: Gvalue) = + ## signal up-to-date value, given inputs, useful for update value outside of eval, as in update to locals in forward + var maxep = 0 + for i in x.inputs: + if maxep < i.epoch: + maxep = i.epoch + x.epoch = maxep + proc eval*(v: Gvalue): Gvalue {.discardable.} = proc r(x: Gvalue) = if gtVisited in x.tag: @@ -210,14 +219,14 @@ proc eval*(v: Gvalue): Gvalue {.discardable.} = x.inputs[0].r if maxep < x.inputs[0].epoch: maxep = x.inputs[0].epoch - if x.inputs[0].isTrue: - x.inputs[1].r - if maxep < x.inputs[1].epoch: - maxep = x.inputs[1].epoch - else: + if x.inputs[0].isZero: x.inputs[2].r if maxep < x.inputs[2].epoch: maxep = x.inputs[2].epoch + else: + x.inputs[1].r + if maxep < x.inputs[1].epoch: + maxep = x.inputs[1].epoch else: for i in x.inputs: i.r diff --git a/src/experimental/graph/gauge.nim b/src/experimental/graph/gauge.nim index 62ac32b..84f216a 100644 --- a/src/experimental/graph/gauge.nim +++ b/src/experimental/graph/gauge.nim @@ -11,42 +11,45 @@ import layout, ../../gauge, physics/qcdTypes type Gauge = seq[DLatticeColorMatrixV] type Ggauge* {.final.} = ref object of Gvalue + isZero: bool = false ## specialized for zero fields, unrelated to actual gval gval: Gauge proc getgauge*(x: Gvalue): Gauge = Ggauge(x).gval -proc update*(x: Gvalue, g: Gauge) = - let u = x.getgauge - threads: - for mu in 0.. Date: Thu, 25 Jul 2024 11:16:54 -0500 Subject: [PATCH 46/60] make profiling use allocShared --- src/base/profile.nim | 10 +++++----- src/experimental/stagag.nim | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index eef708d..570e190 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -72,7 +72,7 @@ proc newList[T](len:int32 = 0):List[T] {.noinit.} = result.len = len result.cap = cap if cap > 0: - result.data = cast[ptr UncheckedArray[T]](alloc(sizeof(T)*cap)) + result.data = cast[ptr UncheckedArray[T]](allocShared(sizeof(T)*cap)) listChangeLen[T](cap) else: result.data = nil @@ -80,7 +80,7 @@ proc newListOfCap[T](cap:int32):List[T] {.noinit.} = result.len = 0 result.cap = cap if cap > 0: - result.data = cast[ptr UncheckedArray[T]](alloc(sizeof(T)*cap)) + result.data = cast[ptr UncheckedArray[T]](allocShared(sizeof(T)*cap)) listChangeLen[T](cap) else: result.data = nil @@ -92,16 +92,16 @@ proc setLen[T](ls:var List[T], len:int32) = if cap0 == 0: cap = 1 while cap < len: cap *= 2 - ls.data = cast[ptr UncheckedArray[T]](alloc(sizeof(T)*cap.int)) + ls.data = cast[ptr UncheckedArray[T]](allocShared(sizeof(T)*cap.int)) else: while cap < len: cap *= 2 - ls.data = cast[ptr UncheckedArray[T]](realloc(ls.data, sizeof(T)*cap.int)) + ls.data = cast[ptr UncheckedArray[T]](reallocShared(ls.data, sizeof(T)*cap.int)) ls.cap = cap listChangeLen[T](int32(cap-cap0)) ls.len = len proc free[T](ls:var List[T]) = if ls.cap > 0: - dealloc(ls.data) + deallocShared(ls.data) listChangeLen[T](-ls.cap) ls.len = 0 ls.cap = 0 diff --git a/src/experimental/stagag.nim b/src/experimental/stagag.nim index 8c69038..6ab53b3 100644 --- a/src/experimental/stagag.nim +++ b/src/experimental/stagag.nim @@ -2098,5 +2098,6 @@ if outfn != "": echo "Saving gauge field to file: ", outfn let err = g.saveGauge outfn -#echoTimers() +if intParam("prof",0) != 0: + echoTimers() qexFinalize() From de23e2723cf09cbd964736fded18129ab0fda484 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Fri, 9 Aug 2024 11:49:04 -0500 Subject: [PATCH 47/60] add hotspot profile --- src/base/profile.nim | 163 +++++++++++++++++++++++++++++-- src/base/threading.nim | 12 +-- src/comms/commsUtils.nim | 5 + src/examples/puregaugehb2du1.nim | 10 +- src/experimental/stagag.nim | 43 ++++---- src/field/fieldET.nim | 16 +-- src/io/crc32.nim | 6 +- src/io/parallelIo.nim | 10 +- src/physics/stagD.nim | 14 +-- src/physics/stagSolve.nim | 20 ++-- src/solvers/cg.nim | 5 +- 11 files changed, 234 insertions(+), 70 deletions(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index 570e190..8153a16 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -1,7 +1,7 @@ import threading export threading import comms/comms, stdUtils, base/basicOps -import os, strutils, sequtils, std/monotimes +import os, strutils, sequtils, std/monotimes, std/tables, std/algorithm, strformat export monotimes getOptimPragmas() @@ -49,6 +49,10 @@ type CodePoint = distinct int32 CodePointObj = object toDropTimer: bool + #nsec: int64 + #overhead: int64 + count: uint32 + dropcount: uint32 name: string loc: II SString = static[string] | string @@ -117,6 +121,7 @@ iterator items[T](ls:List[T]):T = proc setLen(ls:var RTInfoObjList, len:int32) {.borrow.} proc free(ls:var RTInfoObjList) {.borrow.} +proc add(ls:var RTInfoObjList, x:RTInfoObj) {.borrow.} template len(ls:RTInfoObjList):int32 = List[RTInfoObj](ls).len template `[]`(ls:RTInfoObjList, n:int32):untyped = List[RTInfoObj](ls)[n] iterator mitems(ls:RTInfoObjList):var RTInfoObj = @@ -183,20 +188,49 @@ proc newCodePoint(ii:II, s:SString = ""):CodePoint = let n = cpHeap.len cpHeap.setlen(n+1) cpHeap[n].toDropTimer = false + cpHeap[n].count = 0 + cpHeap[n].dropcount = 0 cpHeap[n].name = s cpHeap[n].loc = ii CodePoint(n) template name(x:CodePoint):string = cpHeap[x.int].name template loc(x:CodePoint):II = cpHeap[x.int].loc +template count(x:CodePoint):uint32 = cpHeap[x.int].count +template dropcount(x:CodePoint):uint32 = cpHeap[x.int].dropcount template toDropTimer(x:CodePoint):bool = cpHeap[x.int].toDropTimer +template dropTimer(x:CodePoint) = + toDropTimer(x) = true + #echo "dropTimer: ", cpHeap[x.int].loc, " ", cpHeap[x.int].name template overhead(x:RTInfoObj):untyped = x.overhead template childrenOverhead(x:RTInfoObj):untyped = x.childrenOverhead template istic(x:RTInfoObj):bool = x.prev.isNil template isnottic(x:RTInfoObj):bool = not x.prev.isNil - -template dropTimer(x:RTInfo):untyped = toDropTimer(rtiStack[x.int].curr) = true +template toDropTimer(x:RTInfoObj):bool = toDropTimer(x.curr) +template dropTimer(x:RTInfoObj) = dropTimer(x.curr) +proc dropTimerRecursive(x:RTInfoObj) = + #echo "dropTimerRecursive: ", x.prev.loc, " ", x.curr.loc, " ", x.curr.name + dropTimer(x) + for i in 0.. DropWasteTimerRatio: - #if ii.filename != "cg.nim": + inc thisCode.count + if oh.float > ns.float*DropWasteTimerRatio: + #if not toDropTimer(prevRTI) and oh.float > ns.float*DropWasteTimerRatio: + inc thisCode.dropcount + #if ii.filename != "scg.nim": # echo "drop timer: ", oh.float, "/", ns.float, "=", oh.float / ns.float # echo " ", prevRTI.int, " ", thisRTI.int, " ", ii, " ", s # Signal stop if the overhead is too large. - dropTimer(prevRTI) + if thisCode.dropcount > 10 and thisCode.dropcount*10 > thisCode.count: + #echo "dropTimer: ", rtiStack[thisRTI.int].tic.name, " ", thisCode.name, " ", thisCode.loc + dropTimer(prevRTI) + dropTimerChildren(thisRTI) + #dropTimer(thisCode) if toDropTimer(thisCode): freezeTimers() restartTimer = true @@ -574,14 +615,14 @@ type Tstr = tuple label: string stats: string +func markMissing(p:bool,str:string):string = + if p: "[" & str & "]" + else: str template ppT(ts: RTInfoObjList, prefix = "-", total = 0'i64, overhead = 0'i64, count = 0'u32, initIx = 0, showAbove = 0.0, showDropped = true): seq[Tstr] = ppT(List[RTInfoObj](ts), prefix, total, overhead, count, initIx, showAbove, showDropped) proc ppT(ts: List[RTInfoObj], prefix = "-", total = 0'i64, overhead = 0'i64, count = 0'u32, initIx = 0, showAbove = 0.0, showDropped = true): seq[Tstr] = - proc markMissing(p:bool,str:string):string = - if p: "[" & str & "]" - else: str var sub:int64 = 0 subo:int64 = 0 @@ -686,6 +727,110 @@ proc echoTimersRaw* = echo cpHeap echo rtiStack +proc getName(t: ptr RTInfoObj): string = + let tn = t.tic.name + let pn = t.prev.name + let name = (if tn=="":"" else:tn&":") & (if pn=="":"" else:pn&"-") & t.curr.name + if t.prev.toDropTimer: + result = "{" & name & "}" + else: result = name + +var hs = initTable[string,RTInfoObj]() +proc makeHotspotTable(lrti: List[RTInfoObj]): tuple[ns:int64,oh:int64] = + var nstot = int64 0 + var ohtot = int64 0 + for ri in lrti: + let nc = ri.count + if ri.istic or nc==0: continue + let + f0 = splitFile(ri.prev.loc.filename)[1] + l0 = ri.prev.loc.line + f = splitFile(ri.curr.loc.filename)[1] + l = ri.curr.loc.line + loc = f0 & "(" & $l0 & "-" & (if f==f0:"" else:f) & $l & ") " & getName(addr ri) + coh = ri.childrenOverhead + soh = ri.overhead + nsec = ri.nsec + ns = nsec - coh + oh = soh + coh + nstot += ns + ohtot += oh + hs.withValue(loc, t): # t is found value for loc + t.nsec += ri.nsec + t.flops += ri.flops + t.overhead += ri.overhead + t.childrenOverhead += ri.childrenOverhead + t.count += ri.count + for i in 0..0: + let ins = v.nsec - v.childrenOverhead + skeys.add (ns: ins, loc: "incl" & k) + #if v.children.len>0: + var sns = v.nsec + if v.children.len==0: sns -= v.childrenOverhead + for i in 0..0.0: mfs.int|7 else: " ") + nc = (if t.children.len>0: t.children.len|4 else: " ") + f0 = splitFile(t.prev.loc.filename)[1] + l0 = t.prev.loc.line + f = splitFile(t.curr.loc.filename)[1] + l = t.curr.loc.line + loc = f0 & "(" & $l0 & "-" & (if f==f0:"" else:f) & $l & ")" + lc = loc|(-nloc,'.') + nm = getName(t) + #echo &"{pct:6.3f} {ohpct:6.3f} {count} {mf} {nc} {lc} {nm}" + if incl: + echo &"{pct:6.3f} {count} {mf} {nc} I {lc} {nm}" + else: + tsns += nk.ns + let tsnspct = 100.0 * tsns / nstot + echo &"{pct:6.3f} {tsnspct:7.3f} {count} {mf} {nc} S {lc} {nm}" + when isMainModule: import os proc test = diff --git a/src/base/threading.nim b/src/base/threading.nim index 631ef61..73db56c 100644 --- a/src/base/threading.nim +++ b/src/base/threading.nim @@ -162,26 +162,26 @@ template t0waitX* = for b in 1..= tbar0: break else: inc threadLocals.share[threadNum].counter - #fence() - fence() -template t0wait* = threadBarrier() + fence() +template t0wait* = t0waitO() template twait0O* = threadBarrier() template twait0X* = if threadNum==0: inc threadLocals.share[0].counter - #fence() + fence() else: inc threadLocals.share[threadNum].counter let tbar0 = threadLocals.share[threadNum].counter let p{.volatile.} = threadLocals.share[0].counter.addr while true: + fence() if p[] >= tbar0: break - fence() -template twait0* = threadBarrier() +template twait0* = twait0O() template threadBarrier* = #t0waitX diff --git a/src/comms/commsUtils.nim b/src/comms/commsUtils.nim index 1d02719..9fb89f8 100644 --- a/src/comms/commsUtils.nim +++ b/src/comms/commsUtils.nim @@ -249,13 +249,18 @@ template threadRankSum1*[T](comm: Comm, a: T) = var ta{.global.}: type(a) #var ta2{.global.}:array[512,type(a)] if threadNum==0: + tic("threadRankSum1") t0wait() + toc("t0wait") for i in 1.. 0: - echo "Starting warmups" - #setupMDx() - alwaysAccept = warmmd - for n in 1..nwarm: - m.update - m.clearStats - pacc.clear +block: + tic("warmup") + if nwarm > 0: + echo "Starting warmups" + #setupMDx() + alwaysAccept = warmmd + for n in 1..nwarm: + m.update + m.clearStats + pacc.clear + toc("end warmup") echo "Starting HMC" #setupMD5() @@ -2043,11 +2048,11 @@ for i in 0.. 0: m.clearStats pacc.clear - tic() + tic("inference") for n in 1..trajs: echo "Starting inference: ", n echoParams() @@ -2091,13 +2096,15 @@ if trajs > 0: let ttot = getElapsedTime() echo "End inference update: ", tup, " measure: ", ttot-tup, " total: ", ttot let et = getElapsedTime() - toc() + toc("end inference") echo "Inference time: ", et if outfn != "": echo "Saving gauge field to file: ", outfn let err = g.saveGauge outfn -if intParam("prof",0) != 0: - echoTimers() +case intParam("prof",0) +of 1: echoTimers() +of 2: echoHotspots() +else: discard qexFinalize() diff --git a/src/field/fieldET.nim b/src/field/fieldET.nim index 591990e..4367909 100644 --- a/src/field/fieldET.nim +++ b/src/field/fieldET.nim @@ -603,7 +603,7 @@ proc mul*(r:Field; x:Field2; y:Field3) = mul(r[e], x[e], y[e]) proc norm2P*(f:SomeField):auto = - tic() + tic("norm2P[" & $type(f) & "]") mixin norm2, inorm2, simdSum, items, toDouble #var n2:type(norm2(f[0])) var n2: evalType(norm2(toDouble(f[0]))) @@ -611,10 +611,10 @@ proc norm2P*(f:SomeField):auto = #let t = f for x in items(f): inorm2(n2, toDouble(f[x])) - toc("norm2 local") + toc("local") #echoAll n2 result = simdSum(n2) - toc("norm2 simd sum") + toc("simdSum") #echoAll myRank, ",", threadNum, ": ", result #threadSum(result) #toc("norm2 thread sum") @@ -622,7 +622,7 @@ proc norm2P*(f:SomeField):auto = #toc("norm2 rank sum") f.l.threadRankSum(result) #echo result - toc("norm2 thread rank sum") + toc("threadRankSum") template norm2*(f:SomeAllField):auto = when declared(subsetObject): #echo "subsetObj" & s @@ -702,7 +702,7 @@ template dot*(f1:SomeAllField; f2:SomeAllField2):untyped = template dot*(f1:Subsetted; f2:SomeAllField2):untyped = dotP(f1, f2) proc redotP*(f1:SomeField; f2:SomeField2):auto = - tic() + tic("redotP[" & $f1.type & "," & $f2.type & "]") mixin redot, iredot, simdSum, items, toDouble, eval #var d:type(redot(f1[0],f2[0])) var d: evalType(toDouble(redot(f1[0],f2[0]))) @@ -711,9 +711,9 @@ proc redotP*(f1:SomeField; f2:SomeField2):auto = for x in items(t1): #iredot(d, t1[x], t2[x]) d += redot(t1[x], t2[x]) - toc("redot local") + toc("local") result = simdSum(d) - toc("redot simd sum") + toc("simdSum") #threadBarrier() #toc("thread barrier") #threadSum(result) @@ -721,7 +721,7 @@ proc redotP*(f1:SomeField; f2:SomeField2):auto = #rankSum(result) #toc("rank sum") f1.l.threadRankSum(result) - toc("redot thread rank sum") + toc("threadRankSum") template redot*(f1:SomeAllField; f2:SomeAllField2):untyped = when declared(subsetObject): #echo "subsetObj redot" diff --git a/src/io/crc32.nim b/src/io/crc32.nim index 608f218..e06c6a4 100644 --- a/src/io/crc32.nim +++ b/src/io/crc32.nim @@ -3,7 +3,7 @@ import strutils type Crc32* = uint32 const InitCrc32* = Crc32(not 0'u32) const crc32PolyLow = Crc32(0xedb88320) -const crc32Poly = uint64(0x100000000) + uint64(crc32PolyLow) +#const crc32Poly = uint64(0x100000000) + uint64(crc32PolyLow) proc createCrcTable(): array[0..255, Crc32] = for i in 0..255: @@ -48,6 +48,7 @@ proc polyMul(x0: uint32, y0: uint64): uint64 = y = y shl 1 x = x shr 1 +#[ proc polyRem(x0: uint64, y0: uint64): uint32 = var x = x0 var y = y0 @@ -67,6 +68,7 @@ proc polyRem(x0: uint64, y0: uint64): uint32 = echo (x0 xor polyMul(q, y0)).toHex echo x.toHex result = x.uint32 +]# proc mulRem(r1,r2: Crc32): Crc32 = var t = polyMul(r1, r2) shl 1 @@ -75,12 +77,14 @@ proc mulRem(r1,r2: Crc32): Crc32 = t = t shr 8 result = result xor Crc32(t) +#[ proc zeroPadCrc32X(crc: Crc32, n: int): Crc32 = var fac = Crc32(0x80000000) for i in 0..sbQex>op") threadBarrier() if parEven: stagD2ee(s.se, s.so, a, s.g, b, m*m) @@ -95,8 +95,9 @@ proc solveXX*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams; echo "solveEE(QEX): ", sp.getStats else: echo "solveOO(QEX): ", sp.getStats + toc("end sbQex") of sbQuda: - tic() + tic("sbQuda") if parEven: #echo x.even.norm2, " ", sp.r2req s.qudaSolveEE(r,x,m,sp) @@ -111,7 +112,7 @@ proc solveXX*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams; if sp0.verbosity>0: echo "solveXX(QUDA): ", sp.getStats of sbGrid: - tic() + tic("sbQuda") if parEven: s.gridSolveEE(r,x,m,sp) toc("gridSolveEE") @@ -127,6 +128,7 @@ proc solveXX*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams; sp.iterationsMax = sp.iterations sp.r2.push 0.0 sp0.addStats(sp) + toc("end solveXX") proc solveEE*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams) = solveXX(s, r, x, m, sp0, parEven=true) @@ -137,7 +139,7 @@ proc solveOO*(s: Staggered; r,x: Field; m: SomeNumber; sp0: var SolverParams) = # right-preconditioned proc solveReconR(s:Staggered; x,b:Field; m:SomeNumber; sp: var SolverParams; b2e,b2o: float) = - tic() + tic("solveReconR") let b2 = b2e + b2o let r2stop = sp.r2req * b2 let r2stop2 = 0.5 * r2stop @@ -175,7 +177,7 @@ proc solveReconR(s:Staggered; x,b:Field; m:SomeNumber; sp: var SolverParams; # left-preconditioned with odd reconstruction proc solveReconL(s:Staggered; x,b:Field; m:SomeNumber; sp: var SolverParams; b2e,b2o: float) = - tic() + tic("solveReconL") #if b2e == 0.0 or b2o == 0.0: #solveR(s, y, r, m, sp, r2e, r2o) var d = newOneOf(b) @@ -415,7 +417,7 @@ when isMainModule: threads: v1 := 0 resetTimers() - precon = true + #precon = true s.solve(v1, v3, m, sp) threads: r := v1 - v2 diff --git a/src/solvers/cg.nim b/src/solvers/cg.nim index f3237f3..8f9234d 100644 --- a/src/solvers/cg.nim +++ b/src/solvers/cg.nim @@ -54,7 +54,7 @@ proc newCgState*[T](x,b: T): CgState[T] = # solves: A x = b proc solve*(state: var CgState; op: auto; sp: var SolverParams) = mixin apply, applyPrecon - tic() + tic("solve") let vrb = sp.verbosity template verb(n:int; body:untyped) = if vrb>=n: body @@ -172,7 +172,7 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = echo("CG iteration: ", itn, " r2/b2: ", r2/b2) while itnr2stop: - tic() + tic("cg loop") if itn == 0 or precon != cpLeftRight: preconL(z, r) # z = L r or z = R r for RightNonHerm else: @@ -195,6 +195,7 @@ proc solve*(state: var CgState; op: auto; sp: var SolverParams) = verb(3): echo "beta: ", beta preconR(p, q) # p = R q + toc("preconR") inc itn op.apply(Ap, p) toc("Ap") From 380d724efef7ca1dc1bed758098f8adba62654d9 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Fri, 9 Aug 2024 12:34:10 -0500 Subject: [PATCH 48/60] fix CI for Nim 1.6 --- src/base/profile.nim | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index 8153a16..e6ee8fd 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -121,7 +121,8 @@ iterator items[T](ls:List[T]):T = proc setLen(ls:var RTInfoObjList, len:int32) {.borrow.} proc free(ls:var RTInfoObjList) {.borrow.} -proc add(ls:var RTInfoObjList, x:RTInfoObj) {.borrow.} +#proc add(ls:var RTInfoObjList, x:RTInfoObj) {.borrow.} +proc add(ls:var RTInfoObjList, x:RTInfoObj) = add(List[RTInfoObj](ls), x) template len(ls:RTInfoObjList):int32 = List[RTInfoObj](ls).len template `[]`(ls:RTInfoObjList, n:int32):untyped = List[RTInfoObj](ls)[n] iterator mitems(ls:RTInfoObjList):var RTInfoObj = @@ -747,7 +748,7 @@ proc makeHotspotTable(lrti: List[RTInfoObj]): tuple[ns:int64,oh:int64] = l0 = ri.prev.loc.line f = splitFile(ri.curr.loc.filename)[1] l = ri.curr.loc.line - loc = f0 & "(" & $l0 & "-" & (if f==f0:"" else:f) & $l & ") " & getName(addr ri) + loc = f0 & "(" & $l0 & "-" & (if f==f0:"" else:f) & $l & ") " & getName(unsafeAddr ri) coh = ri.childrenOverhead soh = ri.overhead nsec = ri.nsec From d3d4bfb38370838fdd52fe905da499cbd875b35b Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Mon, 12 Aug 2024 16:17:45 -0500 Subject: [PATCH 49/60] examples: add simple multi_nc example --- src/examples/multi_nc.nim | 72 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 src/examples/multi_nc.nim diff --git a/src/examples/multi_nc.nim b/src/examples/multi_nc.nim new file mode 100644 index 0000000..ad7e33e --- /dev/null +++ b/src/examples/multi_nc.nim @@ -0,0 +1,72 @@ +import qex, gauge/stoutsmear + +qexInit() + +let + seed = 4321u + lat = @[8,8,8,16] + nd = lat.len + lo = lat.newLayout + vol = lo.physVol + gc = GaugeActionCoeffs(plaq: 6.0) + +var r = lo.newRNGField(RngMilc6, seed) + +var gaugeu1 = newseq[Field[VLEN,Color[MatrixArray[1,1,DComplexV]]]](nd) +var gaugesu2 = newseq[Field[VLEN,Color[MatrixArray[2,2,DComplexV]]]](nd) +var gaugesu3= newseq[Field[VLEN,Color[MatrixArray[3,3,DComplexV]]]](nd) +for i in 0.. Date: Fri, 16 Aug 2024 13:11:03 -0500 Subject: [PATCH 50/60] add Gauge type --- src/gauge/gaugeUtils.nim | 57 +++++++++++++++++++++++++++++++++------- src/physics/qcdTypes.nim | 16 +++++++++++ 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/gauge/gaugeUtils.nim b/src/gauge/gaugeUtils.nim index 8afb886..a61c3dc 100644 --- a/src/gauge/gaugeUtils.nim +++ b/src/gauge/gaugeUtils.nim @@ -12,23 +12,39 @@ import maths, rng, physics/qcdTypes import std/[hashes, tables] -#[ type GroupKind* = enum - gkU, gkSU, gkHerm, gkAntiHerm # traceless, real/complex - Gauge*[T] = object - u*: seq[T] - n*: int + gkU, gkAntiHerm, + gkSU, gkTracelessAntiHerm, + gkO, gkRealAntiSym, + gkSO, gkTracelessRealAntiSym, + gkSp, + gkHerm, gkTracelessHerm, + gkRealSym, gkTracelessRealSym + GaugeBase* {.inheritable.} = object group*: GroupKind -]# + n*: int + GaugeObj*[T] = object of GaugeBase + u*: seq[T] + Gauge*[T] = ref GaugeObj[T] +proc default*(g: Gauge) = + let nd = g.u.len + threads: + if g.group in {gkU, gkSU, gkO, gkSO}: + for i in 0.. Date: Fri, 16 Aug 2024 23:06:11 +0000 Subject: [PATCH 51/60] passL: dynamic link to libquda and rpath for quda and qmp --- src/comms/qmp.nim | 2 +- src/quda/qudaWrapperImpl.nim | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/comms/qmp.nim b/src/comms/qmp.nim index 5b9f5c7..6304c4b 100644 --- a/src/comms/qmp.nim +++ b/src/comms/qmp.nim @@ -5,7 +5,7 @@ when existsEnv("QMPDIR"): else: const qmpDir {.strDefine.} = getHomeDir() & "lqcd/install/qmp" const qmpPassC = "-I" & qmpDir & "/include" -const qmpPassL* = "-L" & qmpDir & "/lib -lqmp" +const qmpPassL* = "-L" & qmpDir & "/lib -lqmp -Wl,-rpath=" & qmpDir & "/lib" static: echo "Using QMP: ", qmpDir echo "QMP compile flags: ", qmpPassC diff --git a/src/quda/qudaWrapperImpl.nim b/src/quda/qudaWrapperImpl.nim index 919d175..976eb8a 100644 --- a/src/quda/qudaWrapperImpl.nim +++ b/src/quda/qudaWrapperImpl.nim @@ -34,7 +34,8 @@ when nvhpcDir != "": {.passL: "-Wl,-rpath," & qudaDir & "/lib".} when cudaLibDir=="" and nvhpcDir=="": - {.passL: qudaDir & "/lib/libquda.a -lstdc++ ".} + {.passL: "-L" & qudaDir & "/lib -lquda -lstdc++ ".} + {.passL: "-Wl,-rpath," & qudaDir & "/lib".} const qmpDir {.strdefine.} = getEnv("QMPDIR") const qioDir {.strdefine.} = getEnv("QIODIR") From c0434ff8ee323516fbb9536cecd95ff8ceaee239 Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Tue, 27 Aug 2024 15:45:47 -0500 Subject: [PATCH 52/60] base/profile: fix string handling - use a global char array pool for saving timer names - use global vars for instantiation info --- src/base/profile.nim | 91 +++++++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 22 deletions(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index e6ee8fd..b47bc39 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -36,7 +36,7 @@ tic() injects symbols: ]## type - II = typeof(instantiationInfo()) + II = ptr typeof(instantiationInfo()) RTInfo = distinct int RTInfoObj = object nsec: int64 @@ -53,13 +53,15 @@ type #overhead: int64 count: uint32 dropcount: uint32 - name: string + name: CStr loc: II SString = static[string] | string List[T] = object # No GCed type allowed len,cap:int32 data:ptr UncheckedArray[T] RTInfoObjList = distinct List[RTInfoObj] + CStr = object + p, s: int32 var rtiListLength:int32 = 0 @@ -132,6 +134,51 @@ iterator mitems(ls:RTInfoObjList):var RTInfoObj = func isNil(x:RTInfo):bool = x.int<0 func isNil(x:CodePoint):bool = x.int<0 +const defaultCStrPoolCap {.intDefine.} = 512 +type CStrAtom = array[16,char] +var cstrpool = newListOfCap[CStrAtom](defaultCStrPoolCap) + +proc len(s:CStr):int = int(s.s) +proc newCStr(t:string):CStr = + const a = int32(sizeof(CStrAtom)) + let p = cstrpool.len + let s = int32(t.len) + let n = (s+a-1) div a + cstrpool.setlen(p+n) + var k = 0 + var j = 0 + for i in 0..0) loc = pre & markMissing(noexpand, f0 & "(" & $l0 & "-" & (if f==f0:"" else:f) & $l & ")") - nm = pre & markMissing(noexpand, (if tn=="":"" else:tn&":") & (if pn=="":"" else:pn&"-") & ts[j].curr.name) + nm = pre & markMissing(noexpand, (if tn.len==0:"" else: $tn & ":") & (if pn.len==0:"" else: $pn & "-") & $ts[j].curr.name) if total!=0: let cent = 1e2 * ns.float / total.float @@ -715,7 +762,7 @@ template echoTimers*(expandAbove = 0.0, expandDropped = true, aggregate = true) if n0.0: ", not expanding contributions less than " & $(1e2*expandAbove) & " %" else:"" - echo "Timer total ",(tt.float*1e-6)|(0,-3)," ms, overhead ",(oh.float*1e-6)|(0,-3)," ms ~ ",(1e2*oh.float/tt.float)|(0,-1)," %, memory ",rtiListLength*sizeof(RTInfoObj)," B, max ",rtiListLengthMax*sizeof(RTInfoObj)," B",notshowing + echo "Timer total ",(tt.float*1e-6)|(0,-3)," ms, overhead ",(oh.float*1e-6)|(0,-3)," ms ~ ",(1e2*oh.float/tt.float)|(0,-1)," %, runtime info ",rtiListLength*sizeof(RTInfoObj)," B, max ",rtiListLengthMax*sizeof(RTInfoObj)," B, string ",cstrpool.len*sizeof(cstrpool[0])," B",notshowing echo '='.repeat(width) echo "file(lines)"|(-n), "%"|6, "OH%"|6, "microsecs"|12, "OH"|8, "count"|9, "ns/count"|14, "OH/c"|8, "mf"|8, " label" echo '='.repeat(width) @@ -731,7 +778,7 @@ proc echoTimersRaw* = proc getName(t: ptr RTInfoObj): string = let tn = t.tic.name let pn = t.prev.name - let name = (if tn=="":"" else:tn&":") & (if pn=="":"" else:pn&"-") & t.curr.name + let name = (if tn.len==0:"" else: $tn & ":") & (if pn.len==0:"" else: $pn & "-") & $t.curr.name if t.prev.toDropTimer: result = "{" & name & "}" else: result = name From 34728774bf7bf838a1078d399c1069f412246147 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Thu, 29 Aug 2024 22:38:41 -0500 Subject: [PATCH 53/60] restore profiling children in hotspot list --- src/base/profile.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index b47bc39..309fb77 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -822,7 +822,7 @@ proc makeHotspotTable(lrti: List[RTInfoObj]): tuple[ns:int64,oh:int64] = t.children.add ri.children[i] do: # loc not found hs[loc] = ri - #let tot = makeHotSpotTable(List[RTInfoObj](ri.children)) + let tot = makeHotSpotTable(List[RTInfoObj](ri.children)) return (nstot, ohtot) proc echoHotspots* = From ba7f3c5a58418f6a4bdcfacc36ddb6c435ba41ca Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Wed, 25 Sep 2024 13:05:54 -0500 Subject: [PATCH 54/60] experimental/graph/hmcgauge: tuning for pure gauge HMC --- src/experimental/graph/hmcgauge.nim | 323 ++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 src/experimental/graph/hmcgauge.nim diff --git a/src/experimental/graph/hmcgauge.nim b/src/experimental/graph/hmcgauge.nim new file mode 100644 index 0000000..1e20775 --- /dev/null +++ b/src/experimental/graph/hmcgauge.nim @@ -0,0 +1,323 @@ +import qex +import core, scalar, gauge +from os import fileExists +from strformat import `&` +from math import cos, PI + +proc newOneOf(x: float): float = 0.0 + +type AdamW[Param] = object + alpha, beta1, beta2, eps, lambda: float + m: Param + v: Param + +func newAdamW[Param](param: Param, alpha = 0.001, beta1 = 0.9, beta2 = 0.999, eps = 1e-8, lambda = 0.01): AdamW[Param] = + result = AdamW[Param](alpha: alpha, beta1: beta1, beta2: beta2, eps: eps, lambda: lambda) + result.m = newOneOf param + result.v = newOneOf param + +func newAdam[Param](param: Param, alpha = 0.001, beta1 = 0.9, beta2 = 0.999, eps = 1e-8): AdamW[Param] = + newAdamW[Param](param, alpha, beta1, beta2, eps, lambda = 0.0) + +proc optimize[Param](opt: var AdamW[Param], param: var Param, grad: Param, t: int, lr: float) = + ## arXiv:1711.05101, standard Adam if lambda == 0, effectively scale grad by alpha/stdev(grad) for descent. + ## Decay term is equivalent to an additional term of lambda/(2 scale) param^2 ~ lambda/(2 alpha) stdev(grad) param^2, added to the objective. + ## Normalized weight decay suggests lambda = lambda_norm sqrt(b/BT), for batch size b, training number B, total epoch T. + let + a = opt.alpha + b1 = opt.beta1 + b2 = opt.beta2 + sb1 = 1.0 - b1 + sb2 = 1.0 - b2 + sb1t = 1.0 - b1^t + sb2t = 1.0 - b2^t + dr = opt.lambda + eps = opt.eps + for i in 0..0: + g = axexpmuly(t0, p, g) + p = p - h * gaugeForce(gc, g) + g = axexpmuly(t1, p, g) + p = p - h * gaugeForce(gc, g) + g = axexpmuly(t05, p, g) + (g, p, @[lambda]) + +proc int4MN3F1GP(gc, g0, p0, dt: Gvalue, n: int, coeffs: openarray[float]): (Gvalue, Gvalue, seq[Gvalue]) = + let lambda = coeffs.get(0, 0.2470939580390842) + let theta = coeffs.get(1, 0.5 - 1.0 / sqrt(24.0 * lambda.getfloat)) + # scale the force gradient coeff to about the same order as the other + let chi = coeffs.get(2, (1.0 - sqrt(6.0 * lambda.getfloat) * (1.0 - lambda.getfloat)) / 12.0 * (2.0 / (1.0 - 2.0*lambda.getfloat) * 10.0)) + var g = g0 + var p = p0 + let a0 = theta*dt + let a02 = 2.0*a0 + let a1 = 0.5*dt - a0 + let b0 = lambda*dt + let b1 = dt - 2.0*b0 + let c1 = 0.1*chi*(dt*dt) + g = axexpmuly(a0, p, g) + for i in 0..0: + g = axexpmuly(a02, p, g) + p = p - b0 * gaugeForce(gc, g) + g = axexpmuly(a1, p, g) + p = p - b1 * gaugeForce(gc, axexpmuly(-c1, gaugeForce(gc, g), g)) + g = axexpmuly(a1, p, g) + p = p - b0 * gaugeForce(gc, g) + g = axexpmuly(a0, p, g) + (g, p, @[lambda, theta, chi]) + +proc int4MN5F2GP(gc, g0, p0, dt: Gvalue, n: int, coeffs: openarray[float]): (Gvalue, Gvalue, seq[Gvalue]) = + let rho = coeffs.get(0, 0.06419108866816235) + let theta = coeffs.get(1, 0.1919807940455741) + let vtheta = coeffs.get(2, 0.1518179640276466) + let lambda = coeffs.get(3, 0.2158369476787619) + # scale the force gradient coeff to about the same order as the other + let xi = coeffs.get(4, 0.0009628905212024874 * (2.0 / lambda.getfloat * 20.0)) + var g = g0 + var p = p0 + let a0 = rho*dt + let a02 = 2.0*a0 + let a1 = theta*dt + let a2 = (0.5-(theta+rho))*dt + let b1 = lambda*dt + let b0 = vtheta*dt + let b2 = (1.0-2.0*(lambda+vtheta))*dt + let c1 = 0.05*xi*(dt*dt) + g = axexpmuly(a0, p, g) + for i in 0..0: + g = axexpmuly(a02, p, g) + p = p - b0 * gaugeForce(gc, g) + g = axexpmuly(a1, p, g) + p = p - b1 * gaugeForce(gc, axexpmuly(-c1, gaugeForce(gc, g), g)) + g = axexpmuly(a2, p, g) + p = p - b2 * gaugeForce(gc, g) + g = axexpmuly(a2, p, g) + p = p - b1 * gaugeForce(gc, axexpmuly(-c1, gaugeForce(gc, g), g)) + g = axexpmuly(a1, p, g) + p = p - b0 * gaugeForce(gc, g) + g = axexpmuly(a0, p, g) + (g, p, @[rho, theta, vtheta, lambda, xi]) + +qexInit() + +tic() + +letParam: + gaugefile = "" + savefile = "config" + savefreq = 0 + lat = + if fileExists(gaugefile): + getFileLattice gaugefile + else: + if gaugefile.len > 0: + qexWarn "Nonexistent gauge file: ", gaugefile + @[8,8,8,16] + beta = 5.4 + dt = 0.025 + trajsThermo = 0 + trajsTrain = 50 + trajsTrainlrWarm = 10 + trajsInfer = 0 + lrmax = 1.0 + lrmin = 0.0001 + weightDecay = 0.0 + seed:uint = 1234567891 + gintalg = "2MN" + lambda = @[0.0] + gsteps = 4 + alwaysAccept:bool = 0 + +echo "rank ", myRank, "/", nRanks +threads: echo "thread ", threadNum, "/", numThreads + +installStandardParams() +echoParams() +processHelpParam() + +let + lo = lat.newLayout + vol = lo.physVol + gc = actWilson(beta) + +var r = lo.newRNGField(RngMilc6, seed) +var R:RngMilc6 # global RNG +R.seed(seed, 987654321) + +var + g = lo.newgauge + p = lo.newgauge + +if fileExists(gaugefile): + tic("load") + if 0 != g.loadGauge gaugefile: + qexError "failed to load gauge file: ", gaugefile + qexLog "loaded gauge from file: ", gaugefile," secs: ",getElapsedTime() + toc("read") + g.reunit + toc("reunit") +else: + #g.random r + g.unit + +g.echoPlaq + +let gdt = toGvalue dt +var params = @[gdt] +let + gg = toGvalue g + gp = toGvalue p + ga0 = gc.gaugeAction gg + t0 = 0.5 * gp.norm2 + h0 = ga0 + t0 + tau = float(gsteps) * gdt + (g1, p1, coeffs) = case gintalg + of "2MN": + int2MN(gc, gg, gp, gdt, gsteps, lambda) + of "4MN3F1GP": + int4MN3F1GP(gc, gg, gp, gdt, gsteps, lambda) + of "4MN5F2GP": + int4MN5F2GP(gc, gg, gp, gdt, gsteps, lambda) + else: + raise newException(ValueError, "unknown intalg: " & gintalg) + ga1 = gc.gaugeAction g1 + t1 = 0.5 * p1.norm2 + h1 = ga1 + t1 + dH = h1 - h0 + acc = cond(dH<0.0, 1.0, exp(-dH)) + loss = -acc * (tau * tau) + +params.add coeffs +var grads = newseq[Gvalue]() +for x in params: + grads.add loss.grad x + +var param = newseq[float]() +for x in params: + param.add x.getfloat +var grad = param +var opt = newAdamW(param, lambda = weightDecay) + +block: + var ps = "param:" + for i in 0.. 0 and traj mod savefreq == 0: + tic("save") + let fn = savefile & &".{traj:05}.lime" + if 0 != g.saveGauge(fn): + qexError "Failed to save gauge to file: ",fn + qexLog "saved gauge to file: ",fn," secs: ",getElapsedTime() + toc("done") + + qexLog "traj ",traj," secs: ",getElapsedTime() + toc("traj end") + +toc() + +processSaveParams() +writeParamFile() +qexFinalize() From e2914fcc18a39d9c5cb747b0d732d270869401cf Mon Sep 17 00:00:00 2001 From: Xiao-Yong Jin Date: Wed, 25 Sep 2024 14:00:39 -0500 Subject: [PATCH 55/60] bench/benchQio: fix newSeqWith --- src/bench/benchQio.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bench/benchQio.nim b/src/bench/benchQio.nim index 790c837..feabf4a 100644 --- a/src/bench/benchQio.nim +++ b/src/bench/benchQio.nim @@ -27,8 +27,8 @@ proc test(lat: seq[int]) = var rs = newRNGField(RngMilc6, lo, intParam("seed", 987654321).uint64) #var r0 = lo.RealS() #var r1 = lo.RealS() - var r0 = newSeqWith[type lo.ColorMatrixS](4, lo.ColorMatrixS) - var r1 = newSeqWith[type lo.ColorMatrixS](4, lo.ColorMatrixS) + var r0 = newSeqWith(4, lo.ColorMatrixS) + var r1 = newSeqWith(4, lo.ColorMatrixS) var fn = stringParam("fn", "testqio.lat") let bytes = r0.len * r0[0].numNumbers * sizeof(r0[0][0].numberType) byts.add bytes From 8c90ef26f70d85fd1a9fa475610d8a7c522b0cca Mon Sep 17 00:00:00 2001 From: James Osborn Date: Tue, 1 Oct 2024 13:55:44 -0500 Subject: [PATCH 56/60] improve too large r2 message small updates --- src/base/profile.nim | 1 + src/examples/staghmc_sh.nim | 2 +- src/maths/matexp.nim | 43 +++++++++++++++++++++++++++++++++++++ src/physics/stagD.nim | 4 ++-- 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/base/profile.nim b/src/base/profile.nim index 309fb77..7197cbf 100644 --- a/src/base/profile.nim +++ b/src/base/profile.nim @@ -989,3 +989,4 @@ when isMainModule: toc("end") echoTimers() echoTimersRaw() + echoHotspots() diff --git a/src/examples/staghmc_sh.nim b/src/examples/staghmc_sh.nim index fb6d9f9..ac5c5d1 100644 --- a/src/examples/staghmc_sh.nim +++ b/src/examples/staghmc_sh.nim @@ -241,7 +241,7 @@ for k in 0.. sp.r2req: - qexError "Max r2 larger than requested." + qexError &"Max r2 ({sp.r2.max}) larger than requested ({sp.r2req})" sp.resetStats proc reunit(g:auto) = diff --git a/src/maths/matexp.nim b/src/maths/matexp.nim index 3e17421..423f642 100644 --- a/src/maths/matexp.nim +++ b/src/maths/matexp.nim @@ -564,6 +564,10 @@ type order*: int scale*: int valid*: bool +proc newExpParam*: ExpParam = + result.kind = ekPoly + result.order = 4 + result.scale = 20 proc expm1NoScale*[R,M:Mat1](p: var ExpParam, r: var R, m: M) = p.valid = true @@ -942,3 +946,42 @@ when isMainModule: testDeriv(CM[1,float]) testDeriv(CM[2,float]) testDeriv(CM[3,float]) + + # example from https://blogs.mathworks.com/cleve/2012/07/23/a-balancing-act-for-the-matrix-exponential + proc getm3: MatrixArray[3,3,float] = + let + a = 2e10 + b = 4e8/6 + c = 200/3 + d = 3.0 + e = 1e-8 + result[0,1] = e + result[1,0] = -(a+b) + result[1,1] = -d + result[1,2] = a + result[2,0] = c + result[2,2] = -c + proc getm3exp: MatrixArray[3,3,float] = + result[0,0] = 0.446849468283175 + result[0,1] = 1.54044157383952e-09 + result[0,2] = 0.462811453558774 + result[1,0] = -5743067.77947947 + result[1,1] = -0.0152830038686819 + result[1,2] = -4526542.71278401 + result[2,0] = 0.447722977849494 + result[2,1] = 1.54270484519591e-09 + result[2,2] = 0.463480648837651 + + proc testgetm3 = + let a = getm3() + let ae = getm3exp() + var p = newExpParam() + p.scale = 60 + p.order = 12 + let b = p.exp(a) + let d = b-ae + echo a + echo b + echo sqrt(d.norm2/ae.norm2) + testgetm3() + diff --git a/src/physics/stagD.nim b/src/physics/stagD.nim index 2b9c974..84a93d9 100644 --- a/src/physics/stagD.nim +++ b/src/physics/stagD.nim @@ -446,14 +446,14 @@ proc stagD2xx*(sdx,sdy:StaggeredD; r:Field; g:openArray[Field2]; block: stagDP(sdy, t, g, x, 0): rir := 0 - toc("stagDP") + toc("stagDP", flops=(g.len*(72+66+6))*sdy.subset.len) threadBarrier() #toc("barrier") #stagD(sde, r, g, t, 0.0) block: stagDM(sdx, r, g, t, 6): rir := (4.0*m2)*x[ir] - toc("stagDM") + toc("stagDM", flops=(6+g.len*(72+66+6))*sdx.subset.len) #threadBarrier() #r[sde.sub] := m2*x - r #for ir in r[sde.subset]: From 2c6b34f7de90c5019a14de233b4be2a04ba40137 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Thu, 3 Oct 2024 15:36:25 -0500 Subject: [PATCH 57/60] some updates for Nim 2.2.0 --- src/comms/commsUtils.nim | 1 + src/simd.nim | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/comms/commsUtils.nim b/src/comms/commsUtils.nim index 9fb89f8..7e05271 100644 --- a/src/comms/commsUtils.nim +++ b/src/comms/commsUtils.nim @@ -47,6 +47,7 @@ macro echo0*(args: varargs[untyped]): untyped = var call = newCall(bindSym"echoRaw") result = evalArgs(call, args) result.add(quote do: + bind myRank if myRank==0 and threadNum==0: `call` ) diff --git a/src/simd.nim b/src/simd.nim index 7c36241..bde7919 100644 --- a/src/simd.nim +++ b/src/simd.nim @@ -36,7 +36,7 @@ when true: msa(SimdS16, 16, float32) when not declared(SimdD16): when declared(SimdD8): - msa(SimdD16, 2, SimdD8[]) + msa(SimdD16, 2, `[]`SimdD8) elif declared(SimdD4): msa(SimdD16, 4, SimdD4[]) elif declared(SimdD2): From 77a5fbd20f9385f4892239cca7f8c00934e93086 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Thu, 3 Oct 2024 17:06:43 -0500 Subject: [PATCH 58/60] fix use of array in macro for Nim devel --- src/comms/commsUtils.nim | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/comms/commsUtils.nim b/src/comms/commsUtils.nim index 7e05271..bb9a423 100644 --- a/src/comms/commsUtils.nim +++ b/src/comms/commsUtils.nim @@ -180,16 +180,18 @@ macro rankSumN*(comm:Comm, a:varargs[typed]):auto = i0 = i break if i0<0: + let b = newNimNode(nnkBracket) var s = newNimNode(nnkStmtList) let t = ident("t") for i in 0.. Date: Fri, 8 Nov 2024 16:30:57 -0600 Subject: [PATCH 59/60] simd: fix for nim 2.2.0 --- src/simd.nim | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/simd.nim b/src/simd.nim index bde7919..0842aea 100644 --- a/src/simd.nim +++ b/src/simd.nim @@ -27,20 +27,20 @@ when defined(SSE) or defined(AVX) or defined(AVX512): when true: when not declared(SimdS16): when declared(SimdS8): - msa(SimdS16, 2, SimdS8[]) + msa(SimdS16, 2, `[]`(SimdS8)) elif declared(SimdS4): - msa(SimdS16, 4, SimdS4[]) + msa(SimdS16, 4, `[]`(SimdS4)) elif declared(SimdS2): - msa(SimdS16, 8, SimdS2[]) + msa(SimdS16, 8, `[]`(SimdS2)) else: msa(SimdS16, 16, float32) when not declared(SimdD16): when declared(SimdD8): - msa(SimdD16, 2, `[]`SimdD8) + msa(SimdD16, 2, `[]`(SimdD8)) elif declared(SimdD4): - msa(SimdD16, 4, SimdD4[]) + msa(SimdD16, 4, `[]`(SimdD4)) elif declared(SimdD2): - msa(SimdD16, 8, SimdD2[]) + msa(SimdD16, 8, `[]`(SimdD2)) else: msa(SimdD16, 16, float64) when not declared(SimdS16Obj): @@ -52,16 +52,16 @@ when true: when true: when not declared(SimdS8): when declared(SimdS4): - msa(SimdS8, 2, SimdS4[]) + msa(SimdS8, 2, `[]`(SimdS4)) elif declared(SimdS2): - msa(SimdS8, 4, SimdS2[]) + msa(SimdS8, 4, `[]`(SimdS2)) else: msa(SimdS8, 8, float32) when not declared(SimdD8): when declared(SimdD4): - msa(SimdD8, 2, SimdD4[]) + msa(SimdD8, 2, `[]`(SimdD4)) elif declared(SimdD2): - msa(SimdD8, 4, SimdD2[]) + msa(SimdD8, 4, `[]`(SimdD2)) else: msa(SimdD8, 8, float64) when not declared(SimdS8Obj): @@ -73,12 +73,12 @@ when true: when true: when not declared(SimdS4): when declared(SimdS2): - msa(SimdS4, 2, SimdS2[]) + msa(SimdS4, 2, `[]`(SimdS2)) else: msa(SimdS4, 4, float32) when not declared(SimdD4): when declared(SimdD2): - msa(SimdD4, 2, SimdD2[]) + msa(SimdD4, 2, `[]`(SimdD2)) else: msa(SimdD4, 4, float64) when not declared(SimdS4Obj): From 0508c8b12facfde71e59d86a2649c96b00da1ebd Mon Sep 17 00:00:00 2001 From: James Osborn Date: Sun, 10 Nov 2024 22:16:40 -0600 Subject: [PATCH 60/60] fix rpath for mac --- src/comms/qmp.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/comms/qmp.nim b/src/comms/qmp.nim index 6304c4b..dc8ac6f 100644 --- a/src/comms/qmp.nim +++ b/src/comms/qmp.nim @@ -5,7 +5,7 @@ when existsEnv("QMPDIR"): else: const qmpDir {.strDefine.} = getHomeDir() & "lqcd/install/qmp" const qmpPassC = "-I" & qmpDir & "/include" -const qmpPassL* = "-L" & qmpDir & "/lib -lqmp -Wl,-rpath=" & qmpDir & "/lib" +const qmpPassL* = "-L" & qmpDir & "/lib -lqmp -Wl,-rpath," & qmpDir & "/lib" static: echo "Using QMP: ", qmpDir echo "QMP compile flags: ", qmpPassC