Skip to content

Commit

Permalink
added fat7 derivative
Browse files Browse the repository at this point in the history
  • Loading branch information
jcosborn committed Dec 6, 2024
1 parent 01128e2 commit 235ed88
Show file tree
Hide file tree
Showing 4 changed files with 401 additions and 36 deletions.
5 changes: 5 additions & 0 deletions src/gauge/fat7l.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ type Fat7lCoefs* = object
sevenStaple*: float
lepage*: float

#type Fat7lState*[I,O] = object
# coefs*: Fat7lCoefs
# in*: I
# out*: O

proc `$`*(c: Fat7lCoefs): string =
result = "oneLink: " & $c.oneLink & "\n"
result &= "threeStaple: " & $c.threeStaple & "\n"
Expand Down
345 changes: 345 additions & 0 deletions src/gauge/fat7lderiv.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
import fat7l, smearutil
import base/profile, layout

let STAPLE_FLOPS = 3*198+216+18
let SIDE_FORCE_FLOPS = 7*198+3*216+3*18

#[
proc staple(auto out, auto in0, auto link0, int mu, int nu) =
#define link ftmp0[0]
#define linkmu ftmp[0][mu]
#define in ftmp0[1]
#define innu ftmp[1][nu]
#define linkin mtmp[0]
#define back btmp0[0]
#define backnu btmp[0][nu]
#define linkinnu mtmp[1]
QDP_M_eq_M(link, link0, QDP_all);
QDP_M_eq_sM(linkmu, link, QDP_neighbor[mu], QDP_forward, QDP_all);
QDP_M_eq_M(in, in0, QDP_all);
QDP_M_eq_sM(innu, in, QDP_neighbor[nu], QDP_forward, QDP_all);
QDP_M_eq_Ma_times_M(linkin, link, in, QDP_all);
QDP_M_eq_M_times_M(back, linkin, linkmu, QDP_all);
QDP_M_eq_sM(backnu, back, QDP_neighbor[nu], QDP_backward, QDP_all);
QDP_M_eq_M_times_M(linkinnu, link, innu, QDP_all);
QDP_discard_M(innu);
QDP_M_peq_M_times_Ma(out, linkinnu, linkmu, QDP_all);
QDP_discard_M(linkmu);
QDP_M_peq_M(out, backnu, QDP_all);
QDP_discard_M(backnu);
#define STAPLE_FLOPS (3*198+216+18)
#undef link
#undef linkmu
#undef in
#undef innu
#undef linkin
#undef back
#undef backnu
#undef linkinnu
}
]#

#[
static void
side_force(QDP_ColorMatrix *force, QDP_ColorMatrix *bot0, QDP_ColorMatrix *side0,
QDP_ColorMatrix *top0, int mu, int nu, QDP_ColorMatrix *stpl)
{
#define side ftmp0[0]
#define sidemu ftmp[0][mu]
#define top ftmp0[1]
#define topnu ftmp[1][nu]
#define bot ftmp0[2]
#define botnu ftmp[2][nu]
#define sidebot mtmp[0]
#define sidetop mtmp[1]
#define topnusidebot btmp0[0]
#define fbmu btmp[0][mu]
#define botnusidetop btmp0[1]
#define fmbmu btmp[1][mu]
#define sidebotsidemu btmp0[2]
#define stm btmp[2][nu]
#define botnusidemu mtmp[2]
#define botsidemu mtmp[3]
// force += bot * sidemu * topnu+
// force -= bot-mu+ * side-mu * topnu-mu
// -= top <-> bot
// stpl += side * botnu * sidemu+
// stpl += side-nu+ * bot-nu * sidemu-nu
QDP_M_eq_M(side, side0, QDP_all);
QDP_M_eq_sM(sidemu, side, QDP_neighbor[mu], QDP_forward, QDP_all);
QDP_M_eq_M(top, top0, QDP_all);
QDP_M_eq_sM(topnu, top, QDP_neighbor[nu], QDP_forward, QDP_all);
QDP_M_eq_M(bot, bot0, QDP_all);
QDP_M_eq_sM(botnu, bot, QDP_neighbor[nu], QDP_forward, QDP_all);
QDP_M_eq_Ma_times_M(sidebot, side, bot, QDP_all);
QDP_M_eq_Ma_times_M(sidetop, side, top, QDP_all);
QDP_M_eq_Ma_times_M(topnusidebot, topnu, sidebot, QDP_all);
QDP_M_eq_sM(fbmu, topnusidebot, QDP_neighbor[mu], QDP_backward, QDP_all);
QDP_M_eq_Ma_times_M(botnusidetop, botnu, sidetop, QDP_all);
QDP_M_eq_sM(fmbmu, botnusidetop, QDP_neighbor[mu], QDP_backward, QDP_all);
QDP_M_eq_M_times_M(sidebotsidemu, sidebot, sidemu, QDP_all);
QDP_M_eq_sM(stm, sidebotsidemu, QDP_neighbor[nu], QDP_backward, QDP_all);
QDP_M_eq_M_times_Ma(botnusidemu, botnu, sidemu, QDP_all);
QDP_discard_M(botnu);
QDP_M_peq_M_times_M(stpl, side, botnusidemu, QDP_all);
//QDP_M_meq_M_times_Ma(force, top, botnusidemu, QDP_all);
QDP_M_peq_M_times_Ma(force, top, botnusidemu, QDP_all);
QDP_M_eq_M_times_M(botsidemu, bot, sidemu, QDP_all);
QDP_discard_M(sidemu);
QDP_M_peq_M_times_Ma(force, botsidemu, topnu, QDP_all);
QDP_discard_M(topnu);
//QDP_M_meq_Ma(force, fbmu, QDP_all);
QDP_M_peq_Ma(force, fbmu, QDP_all);
QDP_discard_M(fbmu);
QDP_M_peq_Ma(force, fmbmu, QDP_all);
QDP_discard_M(fmbmu);
QDP_M_peq_M(stpl, stm, QDP_all);
QDP_discard_M(stm);
#define SIDE_FORCE_FLOPS (7*198+3*216+3*18)
#undef side
#undef sidemu
#undef top
#undef topnu
#undef bot
#undef botnu
#undef sidebot
#undef sidetop
#undef topnusidebot
#undef fbmu
#undef botnusidetop
#undef fmbmu
#undef sidebotsidemu
#undef stm
#undef botnusidemu
#undef botsidemu
}
]#

proc fat7lderiv(perf: var PerfInfo, gauge: auto, deriv: auto,
coef: Fat7lCoefs, mid: auto) =
tic()
var nflops = 0
let coefL = coef.lepage
let coef1 = coef.oneLink - 6*coefL
let coef3 = coef.threeStaple
let coef5 = coef.fiveStaple
let coef7 = coef.sevenStaple
let have5 = (coef5!=0.0) or (coef7!=0.0) or (coefL!=0.0)
let have3 = (coef3!=0.0) or have5
type lcm = type(gauge[0])
let lo = gauge[0].l
proc newlcm: lcm = result.new(gauge[0].l)
var
stpl3: array[4,lcm]
mid5: array[4,lcm]
stpl5,mid3: lcm
s1: array[4,array[4,Shifter[lcm,type(gauge[0][0])]]]
s1a,s1b: array[4,Shifter[lcm,type(gauge[0][0])]]
tm1,tm2: lcm
sm1: array[4,Shifter[lcm,type(gauge[0][0])]]
if have3:
if have5:
for mu in 0..3:
stpl3[mu] = newlcm()
mid5[mu] = newlcm()
s1b[mu] = newShifter(gauge[0], mu, 1)
stpl5 = newlcm()
mid3 = newlcm()
for mu in 0..3:
for nu in 0..3:
if nu!=mu:
s1[mu][nu] = newShifter(gauge[mu], nu, 1)
s1a[mu] = newShifter(gauge[0], mu, 1)
sm1[mu] = newShifter(gauge[0], mu, -1)
tm1 = newlcm()
tm2 = newlcm()
threads:
for mu in 0..<4:
for nu in 0..<4:
if nu!=mu:
discard s1[mu][nu] ^* gauge[mu]
for sig in 0..3:
if have5:
for mu in 0..3:
if mu==sig: continue
#QDP_M_eq_zero(stpl3[mu], QDP_all);
stpl3[mu] := 0
#staple(stpl3[mu], gauge[sig], gauge[mu], sig, mu);
symStaple(stpl3[mu], 1.0, gauge[mu], gauge[sig],
s1[mu][sig], s1[sig][mu], tm1, sm1[mu])
#QDP_M_eq_zero(mid5[mu], QDP_all);
mid5[mu] := 0
nflops += STAPLE_FLOPS
for rho in 0..3:
if rho==sig: continue
#QDP_M_eq_zero(stpl5, QDP_all);
stpl5 := 0
if coef7!=0.0:
for mu in 0..3:
if mu==sig or mu==rho: continue
for nu in 0..3:
if nu==sig or nu==rho or nu==mu: continue
#staple(stpl5, stpl3[mu], gauge[nu], sig, nu);
discard s1a[nu] ^* stpl3[mu]
symStaple(stpl5, 1.0, gauge[nu], stpl3[mu],
s1[nu][sig], s1a[nu], tm1, sm1[nu])
nflops += STAPLE_FLOPS
#QDP_M_eq_r_times_M(stpl5, &coef7, stpl5, QDP_all);
stpl5 *= coef7
nflops += 18
#QDP_M_eq_zero(mid3, QDP_all);
mid3 := 0
if coefL!=0.0:
#QDP_M_peq_r_times_M(stpl5, &coefL, stpl3[rho], QDP_all);
stpl5 += coefL * stpl3[rho]
nflops += 36
if coefL!=0.0 or coef7!=0.0:
#side_force(deriv[rho], mid[sig], gauge[rho], stpl5, sig, rho, mid3);
discard s1a[rho] ^* stpl5
discard s1b[rho] ^* mid[sig]
symStapleDeriv(deriv[rho], mid3,
gauge[rho], stpl5, s1[rho][sig], s1a[rho],
mid[sig], s1b[rho], tm1, tm2, sm1[rho], sm1[sig])
nflops += SIDE_FORCE_FLOPS
if coefL!=0.0:
#QDP_M_peq_r_times_M(mid5[rho], &coefL, mid3, QDP_all);
mid5[rho] += coefL * mid3
nflops += 36
#//QDP_M_eqm_r_times_M(mid3, &coef7, mid3, QDP_all);
#QDP_M_eq_r_times_M(mid3, &coef7, mid3, QDP_all);
mid3 *= coef7
#QDP_M_peq_r_times_M(mid3, &coef5, mid[sig], QDP_all);
mid3 += coef5 * mid[sig]
nflops += 18+36
for mu in 0..3:
if mu==sig or mu==rho: continue
for nu in 0..3:
if nu==sig or nu==rho or nu==mu: continue
#side_force(deriv[mu], mid3, gauge[mu], stpl3[nu], sig, mu, mid5[nu]);
discard s1a[mu] ^* stpl3[nu]
discard s1b[mu] ^* mid3
symStapleDeriv(deriv[mu], mid5[nu],
gauge[mu], stpl3[nu], s1[mu][sig], s1a[mu],
mid3, s1b[mu], tm1, tm2, sm1[mu], sm1[sig])
nflops += SIDE_FORCE_FLOPS

if have3:
#QDP_M_eq_zero(mid3, QDP_all);
mid3 := 0
for mu in 0..3:
if mu==sig: continue
if have5:
#//QDP_M_eq_r_times_M_minus_M(stpl5, &coef3, mid[sig], mid5[mu], QDP_all);
#QDP_M_eq_r_times_M_plus_M(stpl5, &coef3, mid[sig], mid5[mu], QDP_all);
stpl5 := coef3*mid[sig] + mid5[mu]
nflops += 36
else:
#QDP_M_eq_r_times_M(stpl5, &coef3, mid[sig], QDP_all);
stpl5 := coef3*mid[sig]
nflops += 18
#side_force(deriv[mu], stpl5, gauge[mu], gauge[sig], sig, mu, mid3);
discard s1a[mu] ^* stpl5
symStapleDeriv(deriv[mu], mid3,
gauge[mu], gauge[sig], s1[mu][sig], s1[sig][mu],
stpl5, s1a[mu], tm1, tm2, sm1[mu], sm1[sig])
nflops += SIDE_FORCE_FLOPS
#//QDP_M_meq_M(deriv[sig], mid3, QDP_all);
#QDP_M_peq_M(deriv[sig], mid3, QDP_all);
deriv[sig] += mid3
nflops += 18
if coef1!=0.0:
#QDP_M_peq_r_times_M(deriv[sig], &coef1, mid[sig], QDP_all);
deriv[sig] += coef1 * mid[sig]
nflops += 36

if coefL!=0.0:
# fix up Lepage term
let fixL = -coefL
for mu in 0..3:
#QDP_M_eq_zero(ftmp0[0], QDP_all);
tm1 := 0
for nu in 0..3:
if nu==mu: continue
#QDP_M_eq_Ma_times_M(btmp0[0], mid[nu], gauge[nu], QDP_all);
tm2 := mid[nu].adj * gauge[nu]
#QDP_M_eq_sM(btmp[0][nu], btmp0[0], QDP_neighbor[nu], QDP_backward, QDP_all);
discard sm1[nu] ^* tm2
#QDP_M_eq_M_times_Ma(stpl5, mid[nu], gauge[nu], QDP_all);
stpl5 := mid[nu] * gauge[nu].adj
#QDP_M_meq_M(stpl5, btmp[0][nu], QDP_all);
stpl5 -= sm1[nu].field
#QDP_discard_M(btmp[0][nu]);
#QDP_M_peq_M(ftmp0[0], stpl5, QDP_all);
#QDP_M_peq_Ma(ftmp0[0], stpl5, QDP_all);
tm1 += stpl5 + stpl5.adj
#QDP_M_eq_sM(ftmp[0][mu], ftmp0[0], QDP_neighbor[mu], QDP_forward, QDP_all);
discard s1a[mu] ^* tm1
#QDP_M_eq_M_times_M(stpl5, ftmp0[0], gauge[mu], QDP_all);
stpl5 := tm1 * gauge[mu]
#QDP_M_meq_M_times_M(stpl5, gauge[mu], ftmp[0][mu], QDP_all);
stpl5 -= gauge[mu] * s1a[mu].field
#QDP_discard_M(ftmp[0][mu]);
#QDP_M_peq_r_times_M(deriv[mu], &fixL, stpl5, QDP_all);
deriv[mu] += fixL * stpl5
nflops += 4*(3*(2*198+3*18)+198+216+36)

#info->final_sec = QOP_time() - dtime;
#info->final_flop = nflops*QDP_sites_on_node;
#info->status = QOP_SUCCESS;

when isMainModule:
import qex
import physics/qcdTypes
import gauge
qexInit()
#var defaultGaugeFile = "l88.scidac"
let defaultLat = @[8,8,8,8]
defaultSetup()
for mu in 0..<g.len: g[mu] := 1
#g.random

var info: PerfInfo
var coef: Fat7lCoefs
coef.oneLink = 1.0
coef.threeStaple = 1.0
coef.fiveStaple = 1.0
coef.sevenStaple = 1.0
coef.lepage = 1.0

var fl = lo.newGauge()
var fl2 = lo.newGauge()
var dfl = lo.newGauge()
var g2 = lo.newGauge()
var dg = lo.newGauge()
var fd = lo.newGauge()
for mu in 0..<dg.len:
dg[mu] := 0.00001 * g[mu]
g2[mu] := g[mu] + dg[mu]
fd[mu] := 0

echo g.plaq
echo g2.plaq
makeImpLinks(info, fl, g, coef)
makeImpLinks(info, fl2, g2, coef)
echo fl.plaq
echo fl2.plaq
#echo ll.plaq
#echo pow(1.0,4)/6.0
#echo pow(1.0+6.0,4)/6.0
#echo pow(1.0+6.0+6.0*4.0,4)/6.0
#echo pow(1.0+6.0+6.0*4.0+6.0*4.0*2.0,4)/6.0
#echo pow(1.0+6.0+6.0*4.0+6.0*4.0*2.0+6.0,4)/6.0

fat7lderiv(info, g, fd, coef, dg)
for mu in 0..3:
dfl[mu] := fl2[mu] - fl[mu]
echo dfl[mu].norm2
echo fd[mu].norm2
dfl[mu] -= fd[mu]
echo dfl[mu].norm2

qexFinalize()
Loading

0 comments on commit 235ed88

Please sign in to comment.