From 235ed88306aab6c0236c7cd6394a20bc2aa00d60 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Thu, 5 Dec 2024 18:14:05 -0600 Subject: [PATCH] added fat7 derivative --- src/gauge/fat7l.nim | 5 + src/gauge/fat7lderiv.nim | 345 +++++++++++++++++++++++++++++++++++++++ src/gauge/hypsmear.nim | 37 +---- src/gauge/smearutil.nim | 50 ++++++ 4 files changed, 401 insertions(+), 36 deletions(-) create mode 100644 src/gauge/fat7lderiv.nim create mode 100644 src/gauge/smearutil.nim diff --git a/src/gauge/fat7l.nim b/src/gauge/fat7l.nim index 2aae3561..9fdaea10 100644 --- a/src/gauge/fat7l.nim +++ b/src/gauge/fat7l.nim @@ -13,6 +13,11 @@ type Fat7lCoefs* = object sevenStaple*: float lepage*: float +#type Fat7lState*[I,O] = object +# coefs*: Fat7lCoefs +# in*: I +# out*: O + proc `$`*(c: Fat7lCoefs): string = result = "oneLink: " & $c.oneLink & "\n" result &= "threeStaple: " & $c.threeStaple & "\n" diff --git a/src/gauge/fat7lderiv.nim b/src/gauge/fat7lderiv.nim new file mode 100644 index 00000000..f77b3095 --- /dev/null +++ b/src/gauge/fat7lderiv.nim @@ -0,0 +1,345 @@ +import fat7l, smearutil +import base/profile, layout + +let STAPLE_FLOPS = 3*198+216+18 +let SIDE_FORCE_FLOPS = 7*198+3*216+3*18 + +#[ +proc staple(auto out, auto in0, auto link0, int mu, int nu) = +#define link ftmp0[0] +#define linkmu ftmp[0][mu] +#define in ftmp0[1] +#define innu ftmp[1][nu] +#define linkin mtmp[0] +#define back btmp0[0] +#define backnu btmp[0][nu] +#define linkinnu mtmp[1] + + QDP_M_eq_M(link, link0, QDP_all); + QDP_M_eq_sM(linkmu, link, QDP_neighbor[mu], QDP_forward, QDP_all); + QDP_M_eq_M(in, in0, QDP_all); + QDP_M_eq_sM(innu, in, QDP_neighbor[nu], QDP_forward, QDP_all); + QDP_M_eq_Ma_times_M(linkin, link, in, QDP_all); + QDP_M_eq_M_times_M(back, linkin, linkmu, QDP_all); + QDP_M_eq_sM(backnu, back, QDP_neighbor[nu], QDP_backward, QDP_all); + QDP_M_eq_M_times_M(linkinnu, link, innu, QDP_all); + QDP_discard_M(innu); + QDP_M_peq_M_times_Ma(out, linkinnu, linkmu, QDP_all); + QDP_discard_M(linkmu); + QDP_M_peq_M(out, backnu, QDP_all); + QDP_discard_M(backnu); +#define STAPLE_FLOPS (3*198+216+18) + +#undef link +#undef linkmu +#undef in +#undef innu +#undef linkin +#undef back +#undef backnu +#undef linkinnu +} +]# + +#[ +static void +side_force(QDP_ColorMatrix *force, QDP_ColorMatrix *bot0, QDP_ColorMatrix *side0, + QDP_ColorMatrix *top0, int mu, int nu, QDP_ColorMatrix *stpl) +{ +#define side ftmp0[0] +#define sidemu ftmp[0][mu] +#define top ftmp0[1] +#define topnu ftmp[1][nu] +#define bot ftmp0[2] +#define botnu ftmp[2][nu] +#define sidebot mtmp[0] +#define sidetop mtmp[1] +#define topnusidebot btmp0[0] +#define fbmu btmp[0][mu] +#define botnusidetop btmp0[1] +#define fmbmu btmp[1][mu] +#define sidebotsidemu btmp0[2] +#define stm btmp[2][nu] +#define botnusidemu mtmp[2] +#define botsidemu mtmp[3] + + // force += bot * sidemu * topnu+ + // force -= bot-mu+ * side-mu * topnu-mu + // -= top <-> bot + // stpl += side * botnu * sidemu+ + // stpl += side-nu+ * bot-nu * sidemu-nu + QDP_M_eq_M(side, side0, QDP_all); + QDP_M_eq_sM(sidemu, side, QDP_neighbor[mu], QDP_forward, QDP_all); + QDP_M_eq_M(top, top0, QDP_all); + QDP_M_eq_sM(topnu, top, QDP_neighbor[nu], QDP_forward, QDP_all); + QDP_M_eq_M(bot, bot0, QDP_all); + QDP_M_eq_sM(botnu, bot, QDP_neighbor[nu], QDP_forward, QDP_all); + QDP_M_eq_Ma_times_M(sidebot, side, bot, QDP_all); + QDP_M_eq_Ma_times_M(sidetop, side, top, QDP_all); + QDP_M_eq_Ma_times_M(topnusidebot, topnu, sidebot, QDP_all); + QDP_M_eq_sM(fbmu, topnusidebot, QDP_neighbor[mu], QDP_backward, QDP_all); + QDP_M_eq_Ma_times_M(botnusidetop, botnu, sidetop, QDP_all); + QDP_M_eq_sM(fmbmu, botnusidetop, QDP_neighbor[mu], QDP_backward, QDP_all); + QDP_M_eq_M_times_M(sidebotsidemu, sidebot, sidemu, QDP_all); + QDP_M_eq_sM(stm, sidebotsidemu, QDP_neighbor[nu], QDP_backward, QDP_all); + QDP_M_eq_M_times_Ma(botnusidemu, botnu, sidemu, QDP_all); + QDP_discard_M(botnu); + QDP_M_peq_M_times_M(stpl, side, botnusidemu, QDP_all); + //QDP_M_meq_M_times_Ma(force, top, botnusidemu, QDP_all); + QDP_M_peq_M_times_Ma(force, top, botnusidemu, QDP_all); + QDP_M_eq_M_times_M(botsidemu, bot, sidemu, QDP_all); + QDP_discard_M(sidemu); + QDP_M_peq_M_times_Ma(force, botsidemu, topnu, QDP_all); + QDP_discard_M(topnu); + //QDP_M_meq_Ma(force, fbmu, QDP_all); + QDP_M_peq_Ma(force, fbmu, QDP_all); + QDP_discard_M(fbmu); + QDP_M_peq_Ma(force, fmbmu, QDP_all); + QDP_discard_M(fmbmu); + QDP_M_peq_M(stpl, stm, QDP_all); + QDP_discard_M(stm); +#define SIDE_FORCE_FLOPS (7*198+3*216+3*18) + +#undef side +#undef sidemu +#undef top +#undef topnu +#undef bot +#undef botnu +#undef sidebot +#undef sidetop +#undef topnusidebot +#undef fbmu +#undef botnusidetop +#undef fmbmu +#undef sidebotsidemu +#undef stm +#undef botnusidemu +#undef botsidemu +} +]# + +proc fat7lderiv(perf: var PerfInfo, gauge: auto, deriv: auto, + coef: Fat7lCoefs, mid: auto) = + tic() + var nflops = 0 + let coefL = coef.lepage + let coef1 = coef.oneLink - 6*coefL + let coef3 = coef.threeStaple + let coef5 = coef.fiveStaple + let coef7 = coef.sevenStaple + let have5 = (coef5!=0.0) or (coef7!=0.0) or (coefL!=0.0) + let have3 = (coef3!=0.0) or have5 + type lcm = type(gauge[0]) + let lo = gauge[0].l + proc newlcm: lcm = result.new(gauge[0].l) + var + stpl3: array[4,lcm] + mid5: array[4,lcm] + stpl5,mid3: lcm + s1: array[4,array[4,Shifter[lcm,type(gauge[0][0])]]] + s1a,s1b: array[4,Shifter[lcm,type(gauge[0][0])]] + tm1,tm2: lcm + sm1: array[4,Shifter[lcm,type(gauge[0][0])]] + if have3: + if have5: + for mu in 0..3: + stpl3[mu] = newlcm() + mid5[mu] = newlcm() + s1b[mu] = newShifter(gauge[0], mu, 1) + stpl5 = newlcm() + mid3 = newlcm() + for mu in 0..3: + for nu in 0..3: + if nu!=mu: + s1[mu][nu] = newShifter(gauge[mu], nu, 1) + s1a[mu] = newShifter(gauge[0], mu, 1) + sm1[mu] = newShifter(gauge[0], mu, -1) + tm1 = newlcm() + tm2 = newlcm() + threads: + for mu in 0..<4: + for nu in 0..<4: + if nu!=mu: + discard s1[mu][nu] ^* gauge[mu] + for sig in 0..3: + if have5: + for mu in 0..3: + if mu==sig: continue + #QDP_M_eq_zero(stpl3[mu], QDP_all); + stpl3[mu] := 0 + #staple(stpl3[mu], gauge[sig], gauge[mu], sig, mu); + symStaple(stpl3[mu], 1.0, gauge[mu], gauge[sig], + s1[mu][sig], s1[sig][mu], tm1, sm1[mu]) + #QDP_M_eq_zero(mid5[mu], QDP_all); + mid5[mu] := 0 + nflops += STAPLE_FLOPS + for rho in 0..3: + if rho==sig: continue + #QDP_M_eq_zero(stpl5, QDP_all); + stpl5 := 0 + if coef7!=0.0: + for mu in 0..3: + if mu==sig or mu==rho: continue + for nu in 0..3: + if nu==sig or nu==rho or nu==mu: continue + #staple(stpl5, stpl3[mu], gauge[nu], sig, nu); + discard s1a[nu] ^* stpl3[mu] + symStaple(stpl5, 1.0, gauge[nu], stpl3[mu], + s1[nu][sig], s1a[nu], tm1, sm1[nu]) + nflops += STAPLE_FLOPS + #QDP_M_eq_r_times_M(stpl5, &coef7, stpl5, QDP_all); + stpl5 *= coef7 + nflops += 18 + #QDP_M_eq_zero(mid3, QDP_all); + mid3 := 0 + if coefL!=0.0: + #QDP_M_peq_r_times_M(stpl5, &coefL, stpl3[rho], QDP_all); + stpl5 += coefL * stpl3[rho] + nflops += 36 + if coefL!=0.0 or coef7!=0.0: + #side_force(deriv[rho], mid[sig], gauge[rho], stpl5, sig, rho, mid3); + discard s1a[rho] ^* stpl5 + discard s1b[rho] ^* mid[sig] + symStapleDeriv(deriv[rho], mid3, + gauge[rho], stpl5, s1[rho][sig], s1a[rho], + mid[sig], s1b[rho], tm1, tm2, sm1[rho], sm1[sig]) + nflops += SIDE_FORCE_FLOPS + if coefL!=0.0: + #QDP_M_peq_r_times_M(mid5[rho], &coefL, mid3, QDP_all); + mid5[rho] += coefL * mid3 + nflops += 36 + #//QDP_M_eqm_r_times_M(mid3, &coef7, mid3, QDP_all); + #QDP_M_eq_r_times_M(mid3, &coef7, mid3, QDP_all); + mid3 *= coef7 + #QDP_M_peq_r_times_M(mid3, &coef5, mid[sig], QDP_all); + mid3 += coef5 * mid[sig] + nflops += 18+36 + for mu in 0..3: + if mu==sig or mu==rho: continue + for nu in 0..3: + if nu==sig or nu==rho or nu==mu: continue + #side_force(deriv[mu], mid3, gauge[mu], stpl3[nu], sig, mu, mid5[nu]); + discard s1a[mu] ^* stpl3[nu] + discard s1b[mu] ^* mid3 + symStapleDeriv(deriv[mu], mid5[nu], + gauge[mu], stpl3[nu], s1[mu][sig], s1a[mu], + mid3, s1b[mu], tm1, tm2, sm1[mu], sm1[sig]) + nflops += SIDE_FORCE_FLOPS + + if have3: + #QDP_M_eq_zero(mid3, QDP_all); + mid3 := 0 + for mu in 0..3: + if mu==sig: continue + if have5: + #//QDP_M_eq_r_times_M_minus_M(stpl5, &coef3, mid[sig], mid5[mu], QDP_all); + #QDP_M_eq_r_times_M_plus_M(stpl5, &coef3, mid[sig], mid5[mu], QDP_all); + stpl5 := coef3*mid[sig] + mid5[mu] + nflops += 36 + else: + #QDP_M_eq_r_times_M(stpl5, &coef3, mid[sig], QDP_all); + stpl5 := coef3*mid[sig] + nflops += 18 + #side_force(deriv[mu], stpl5, gauge[mu], gauge[sig], sig, mu, mid3); + discard s1a[mu] ^* stpl5 + symStapleDeriv(deriv[mu], mid3, + gauge[mu], gauge[sig], s1[mu][sig], s1[sig][mu], + stpl5, s1a[mu], tm1, tm2, sm1[mu], sm1[sig]) + nflops += SIDE_FORCE_FLOPS + #//QDP_M_meq_M(deriv[sig], mid3, QDP_all); + #QDP_M_peq_M(deriv[sig], mid3, QDP_all); + deriv[sig] += mid3 + nflops += 18 + if coef1!=0.0: + #QDP_M_peq_r_times_M(deriv[sig], &coef1, mid[sig], QDP_all); + deriv[sig] += coef1 * mid[sig] + nflops += 36 + + if coefL!=0.0: + # fix up Lepage term + let fixL = -coefL + for mu in 0..3: + #QDP_M_eq_zero(ftmp0[0], QDP_all); + tm1 := 0 + for nu in 0..3: + if nu==mu: continue + #QDP_M_eq_Ma_times_M(btmp0[0], mid[nu], gauge[nu], QDP_all); + tm2 := mid[nu].adj * gauge[nu] + #QDP_M_eq_sM(btmp[0][nu], btmp0[0], QDP_neighbor[nu], QDP_backward, QDP_all); + discard sm1[nu] ^* tm2 + #QDP_M_eq_M_times_Ma(stpl5, mid[nu], gauge[nu], QDP_all); + stpl5 := mid[nu] * gauge[nu].adj + #QDP_M_meq_M(stpl5, btmp[0][nu], QDP_all); + stpl5 -= sm1[nu].field + #QDP_discard_M(btmp[0][nu]); + #QDP_M_peq_M(ftmp0[0], stpl5, QDP_all); + #QDP_M_peq_Ma(ftmp0[0], stpl5, QDP_all); + tm1 += stpl5 + stpl5.adj + #QDP_M_eq_sM(ftmp[0][mu], ftmp0[0], QDP_neighbor[mu], QDP_forward, QDP_all); + discard s1a[mu] ^* tm1 + #QDP_M_eq_M_times_M(stpl5, ftmp0[0], gauge[mu], QDP_all); + stpl5 := tm1 * gauge[mu] + #QDP_M_meq_M_times_M(stpl5, gauge[mu], ftmp[0][mu], QDP_all); + stpl5 -= gauge[mu] * s1a[mu].field + #QDP_discard_M(ftmp[0][mu]); + #QDP_M_peq_r_times_M(deriv[mu], &fixL, stpl5, QDP_all); + deriv[mu] += fixL * stpl5 + nflops += 4*(3*(2*198+3*18)+198+216+36) + + #info->final_sec = QOP_time() - dtime; + #info->final_flop = nflops*QDP_sites_on_node; + #info->status = QOP_SUCCESS; + +when isMainModule: + import qex + import physics/qcdTypes + import gauge + qexInit() + #var defaultGaugeFile = "l88.scidac" + let defaultLat = @[8,8,8,8] + defaultSetup() + for mu in 0..