Skip to content

Commit

Permalink
Separate the GPU and CPU code in the .H header files under the dfLowM…
Browse files Browse the repository at this point in the history
…achFoam solver.
  • Loading branch information
uptonwu committed Apr 7, 2024
1 parent 51d8df4 commit 44217db
Show file tree
Hide file tree
Showing 5 changed files with 686 additions and 0 deletions.
101 changes: 101 additions & 0 deletions applications/solvers/dfLowMachFoam/EEqn_GPU.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
volScalarField& he = thermo.he();
double *h_he = dfDataBase.getFieldPointer("he", location::cpu, position::internal);
double *h_boundary_he = dfDataBase.getFieldPointer("he", location::cpu, position::boundary);

EEqn_GPU.process();
EEqn_GPU.sync();
// EEqn_GPU.postProcess(h_he, h_boundary_he);

// copy h_he to he(cpu)
// memcpy(&he[0], h_he, dfDataBase.cell_value_bytes);

//DEBUG_TRACE;
//he.correctBoundaryConditions();
//DEBUG_TRACE;

#if defined DEBUG_
fvScalarMatrix EEqn
(

fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he)
+ fvc::ddt(rho, K) + fvc::div(phi, K)
- dpdt
==
(
turbName == "laminar"
?
(
fvm::laplacian(turbulence->alpha(), he)
- diffAlphaD
+ fvc::div(hDiffCorrFlux)
)
:
(
fvm::laplacian(turbulence->alphaEff(), he)
)
)
);
// EEqn.relax();
EEqn.solve("ha");
// checkResult
// TODO: for temp, now we compare ldu, finally we compare csr
std::vector<double> h_internal_coeffs(dfDataBase.num_boundary_surfaces);
std::vector<double> h_boundary_coeffs(dfDataBase.num_boundary_surfaces);

offset = 0;
forAll(he.boundaryField(), patchi)
{
const fvPatchScalarField& patchHe = he.boundaryField()[patchi];
int patchSize = patchHe.size();
const double* internal_coeff_ptr = &EEqn.internalCoeffs()[patchi][0];
const double* boundary_coeff_ptr = &EEqn.boundaryCoeffs()[patchi][0];
if (patchHe.type() == "processor"
|| patchHe.type() == "processorCyclic") {
memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double));
memset(h_internal_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double));
memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double));
memset(h_boundary_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double));
offset += patchSize * 2;
} else {
memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double));
memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double));
offset += patchSize;
}
}

double *h_boundary_he_tmp = new double[dfDataBase.num_boundary_surfaces];
offset = 0;
forAll(he.boundaryField(), patchi)
{
const fvPatchScalarField& patchHe = he.boundaryField()[patchi];
int patchSize = patchHe.size();
if (patchHe.type() == "processor"
|| patchHe.type() == "processorCyclic") {
const scalarField& patchHeInternal = dynamic_cast<const processorFvPatchField<scalar>&>(patchHe).patchInternalField()();
memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double));
memcpy(h_boundary_he_tmp + offset + patchSize, &patchHeInternal[0], patchSize * sizeof(double));
offset += patchSize * 2;
} else {
memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double));
offset += patchSize;
}
}

bool printFlag = false;
int rank = -1;
if (mpi_init_flag) {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
}
if (!mpi_init_flag || rank == 0) {
// DEBUG_TRACE;
// EEqn_GPU.compareResult(&EEqn.lower()[0], &EEqn.upper()[0], &EEqn.diag()[0], &EEqn.source()[0],
// h_internal_coeffs.data(), h_boundary_coeffs.data(), printFlag);
// DEBUG_TRACE;
// EEqn_GPU.compareHe(&he[0], h_boundary_he_tmp, printFlag);
}

delete h_boundary_he_tmp;

#endif
}
122 changes: 122 additions & 0 deletions applications/solvers/dfLowMachFoam/UEqn_GPU.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Solve the Momentum equation
#if defined DEBUG_
// run CPU, for temp
TICK_START;
tmp<fvVectorMatrix> tUEqn
(
fvm::ddt(rho, U)
+
fvm::div(phi, U)
+
turbulence->divDevRhoReff(U)
);
fvVectorMatrix& UEqn = tUEqn.ref();
TICK_STOP(CPU assembly time);

volTensorField gradU = fvc::grad(U);

double *h_boundary_gradU = new double[dfDataBase.num_boundary_surfaces * 9];
offset = 0;
forAll(U.boundaryField(), patchi)
{
const fvPatchTensorField& patchGradU = gradU.boundaryField()[patchi];
int patchsize = patchGradU.size();
if (patchGradU.type() == "processor"
|| patchGradU.type() == "processorCyclic") {
// print info
if (dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).doTransform()) {
Info << "gradU transform = true" << endl;
} else {
Info << "gradU transform = false" << endl;
}
Info << "rank = " << dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).rank() << endl;

memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double));
tensorField patchGradUInternal =
dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).patchInternalField()();
memcpy(h_boundary_gradU + 9*offset + patchsize * 9, &patchGradUInternal[0][0], patchsize * 9 * sizeof(double));
offset += patchsize * 2;
} else {
memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double));
offset += patchsize;
}
}
#endif

// process
TICK_START;
UEqn_GPU.process();
UEqn_GPU.sync();
TICK_STOP(GPU process time);

// postProcess
// TICK_START;
// UEqn_GPU.postProcess(h_u);
// memcpy(&U[0][0], h_u, dfDataBase.cell_value_vec_bytes);
// U.correctBoundaryConditions();
// K = 0.5*magSqr(U);
// DEBUG_TRACE;
// TICK_STOP(post process time);

#if defined DEBUG_
// UEqn.relax();
TICK_START;
solve(UEqn == -fvc::grad(p));
K.oldTime();
K = 0.5*magSqr(U);
TICK_STOP(CPU solve time);
// checkResult
// TODO: for temp, now we compare ldu, finally we compare csr
std::vector<double> h_internal_coeffs(dfDataBase.num_boundary_surfaces * 3);
std::vector<double> h_boundary_coeffs(dfDataBase.num_boundary_surfaces * 3);

offset = 0;
for (int patchi = 0; patchi < dfDataBase.num_patches; patchi++)
{
const fvPatchVectorField& patchU = U.boundaryField()[patchi];
int patchsize = dfDataBase.patch_size[patchi];
const double* internal_coeff_ptr = &UEqn.internalCoeffs()[patchi][0][0];
const double* boundary_coeff_ptr = &UEqn.boundaryCoeffs()[patchi][0][0];
memcpy(h_internal_coeffs.data() + offset * 3, internal_coeff_ptr, patchsize * 3 * sizeof(double));
memcpy(h_boundary_coeffs.data() + offset * 3, boundary_coeff_ptr, patchsize * 3 * sizeof(double));
if (patchU.type() == "processor" || patchU.type() == "processorCyclic") offset += 2 * patchsize;
else offset += patchsize;
}

double *h_boundary_u_tmp = new double[dfDataBase.num_boundary_surfaces * 3];
offset = 0;
forAll(U.boundaryField(), patchi)
{
const fvPatchVectorField& patchU = U.boundaryField()[patchi];
int patchsize = dfDataBase.patch_size[patchi];

if (patchU.type() == "processor"
|| patchU.type() == "processorCyclic") {
memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double));
vectorField patchUInternal =
dynamic_cast<const processorFvPatchField<vector>&>(patchU).patchInternalField()();
memcpy(h_boundary_u_tmp + 3*offset + 3*patchsize, &patchUInternal[0][0], 3*patchsize * sizeof(double));
offset += 2 * patchsize;
} else {
memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double));
offset += patchsize;
}
}

bool printFlag = false;

int rank = -1;
if (mpi_init_flag) {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
}

if (!mpi_init_flag || rank == 0) {
// UEqn_GPU.compareResult(&UEqn.lower()[0], &UEqn.upper()[0], &UEqn.diag()[0], &UEqn.source()[0][0],
// h_internal_coeffs.data(), h_boundary_coeffs.data(),
// // &gradU[0][0], h_boundary_gradU,
// printFlag);
// UEqn_GPU.compareU(&U[0][0], h_boundary_u_tmp, printFlag);
}
DEBUG_TRACE;
#endif

Loading

0 comments on commit 44217db

Please sign in to comment.