forked from deepflameCFD/deepflame-dev
-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Separate the GPU and CPU code in the .H header files under the dfLowM…
…achFoam solver.
- Loading branch information
Showing
5 changed files
with
686 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
{ | ||
volScalarField& he = thermo.he(); | ||
double *h_he = dfDataBase.getFieldPointer("he", location::cpu, position::internal); | ||
double *h_boundary_he = dfDataBase.getFieldPointer("he", location::cpu, position::boundary); | ||
|
||
EEqn_GPU.process(); | ||
EEqn_GPU.sync(); | ||
// EEqn_GPU.postProcess(h_he, h_boundary_he); | ||
|
||
// copy h_he to he(cpu) | ||
// memcpy(&he[0], h_he, dfDataBase.cell_value_bytes); | ||
|
||
//DEBUG_TRACE; | ||
//he.correctBoundaryConditions(); | ||
//DEBUG_TRACE; | ||
|
||
#if defined DEBUG_ | ||
fvScalarMatrix EEqn | ||
( | ||
|
||
fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he) | ||
+ fvc::ddt(rho, K) + fvc::div(phi, K) | ||
- dpdt | ||
== | ||
( | ||
turbName == "laminar" | ||
? | ||
( | ||
fvm::laplacian(turbulence->alpha(), he) | ||
- diffAlphaD | ||
+ fvc::div(hDiffCorrFlux) | ||
) | ||
: | ||
( | ||
fvm::laplacian(turbulence->alphaEff(), he) | ||
) | ||
) | ||
); | ||
// EEqn.relax(); | ||
EEqn.solve("ha"); | ||
// checkResult | ||
// TODO: for temp, now we compare ldu, finally we compare csr | ||
std::vector<double> h_internal_coeffs(dfDataBase.num_boundary_surfaces); | ||
std::vector<double> h_boundary_coeffs(dfDataBase.num_boundary_surfaces); | ||
|
||
offset = 0; | ||
forAll(he.boundaryField(), patchi) | ||
{ | ||
const fvPatchScalarField& patchHe = he.boundaryField()[patchi]; | ||
int patchSize = patchHe.size(); | ||
const double* internal_coeff_ptr = &EEqn.internalCoeffs()[patchi][0]; | ||
const double* boundary_coeff_ptr = &EEqn.boundaryCoeffs()[patchi][0]; | ||
if (patchHe.type() == "processor" | ||
|| patchHe.type() == "processorCyclic") { | ||
memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double)); | ||
memset(h_internal_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double)); | ||
memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double)); | ||
memset(h_boundary_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double)); | ||
offset += patchSize * 2; | ||
} else { | ||
memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double)); | ||
memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double)); | ||
offset += patchSize; | ||
} | ||
} | ||
|
||
double *h_boundary_he_tmp = new double[dfDataBase.num_boundary_surfaces]; | ||
offset = 0; | ||
forAll(he.boundaryField(), patchi) | ||
{ | ||
const fvPatchScalarField& patchHe = he.boundaryField()[patchi]; | ||
int patchSize = patchHe.size(); | ||
if (patchHe.type() == "processor" | ||
|| patchHe.type() == "processorCyclic") { | ||
const scalarField& patchHeInternal = dynamic_cast<const processorFvPatchField<scalar>&>(patchHe).patchInternalField()(); | ||
memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double)); | ||
memcpy(h_boundary_he_tmp + offset + patchSize, &patchHeInternal[0], patchSize * sizeof(double)); | ||
offset += patchSize * 2; | ||
} else { | ||
memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double)); | ||
offset += patchSize; | ||
} | ||
} | ||
|
||
bool printFlag = false; | ||
int rank = -1; | ||
if (mpi_init_flag) { | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
} | ||
if (!mpi_init_flag || rank == 0) { | ||
// DEBUG_TRACE; | ||
// EEqn_GPU.compareResult(&EEqn.lower()[0], &EEqn.upper()[0], &EEqn.diag()[0], &EEqn.source()[0], | ||
// h_internal_coeffs.data(), h_boundary_coeffs.data(), printFlag); | ||
// DEBUG_TRACE; | ||
// EEqn_GPU.compareHe(&he[0], h_boundary_he_tmp, printFlag); | ||
} | ||
|
||
delete h_boundary_he_tmp; | ||
|
||
#endif | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
// Solve the Momentum equation | ||
#if defined DEBUG_ | ||
// run CPU, for temp | ||
TICK_START; | ||
tmp<fvVectorMatrix> tUEqn | ||
( | ||
fvm::ddt(rho, U) | ||
+ | ||
fvm::div(phi, U) | ||
+ | ||
turbulence->divDevRhoReff(U) | ||
); | ||
fvVectorMatrix& UEqn = tUEqn.ref(); | ||
TICK_STOP(CPU assembly time); | ||
|
||
volTensorField gradU = fvc::grad(U); | ||
|
||
double *h_boundary_gradU = new double[dfDataBase.num_boundary_surfaces * 9]; | ||
offset = 0; | ||
forAll(U.boundaryField(), patchi) | ||
{ | ||
const fvPatchTensorField& patchGradU = gradU.boundaryField()[patchi]; | ||
int patchsize = patchGradU.size(); | ||
if (patchGradU.type() == "processor" | ||
|| patchGradU.type() == "processorCyclic") { | ||
// print info | ||
if (dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).doTransform()) { | ||
Info << "gradU transform = true" << endl; | ||
} else { | ||
Info << "gradU transform = false" << endl; | ||
} | ||
Info << "rank = " << dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).rank() << endl; | ||
|
||
memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double)); | ||
tensorField patchGradUInternal = | ||
dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).patchInternalField()(); | ||
memcpy(h_boundary_gradU + 9*offset + patchsize * 9, &patchGradUInternal[0][0], patchsize * 9 * sizeof(double)); | ||
offset += patchsize * 2; | ||
} else { | ||
memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double)); | ||
offset += patchsize; | ||
} | ||
} | ||
#endif | ||
|
||
// process | ||
TICK_START; | ||
UEqn_GPU.process(); | ||
UEqn_GPU.sync(); | ||
TICK_STOP(GPU process time); | ||
|
||
// postProcess | ||
// TICK_START; | ||
// UEqn_GPU.postProcess(h_u); | ||
// memcpy(&U[0][0], h_u, dfDataBase.cell_value_vec_bytes); | ||
// U.correctBoundaryConditions(); | ||
// K = 0.5*magSqr(U); | ||
// DEBUG_TRACE; | ||
// TICK_STOP(post process time); | ||
|
||
#if defined DEBUG_ | ||
// UEqn.relax(); | ||
TICK_START; | ||
solve(UEqn == -fvc::grad(p)); | ||
K.oldTime(); | ||
K = 0.5*magSqr(U); | ||
TICK_STOP(CPU solve time); | ||
// checkResult | ||
// TODO: for temp, now we compare ldu, finally we compare csr | ||
std::vector<double> h_internal_coeffs(dfDataBase.num_boundary_surfaces * 3); | ||
std::vector<double> h_boundary_coeffs(dfDataBase.num_boundary_surfaces * 3); | ||
|
||
offset = 0; | ||
for (int patchi = 0; patchi < dfDataBase.num_patches; patchi++) | ||
{ | ||
const fvPatchVectorField& patchU = U.boundaryField()[patchi]; | ||
int patchsize = dfDataBase.patch_size[patchi]; | ||
const double* internal_coeff_ptr = &UEqn.internalCoeffs()[patchi][0][0]; | ||
const double* boundary_coeff_ptr = &UEqn.boundaryCoeffs()[patchi][0][0]; | ||
memcpy(h_internal_coeffs.data() + offset * 3, internal_coeff_ptr, patchsize * 3 * sizeof(double)); | ||
memcpy(h_boundary_coeffs.data() + offset * 3, boundary_coeff_ptr, patchsize * 3 * sizeof(double)); | ||
if (patchU.type() == "processor" || patchU.type() == "processorCyclic") offset += 2 * patchsize; | ||
else offset += patchsize; | ||
} | ||
|
||
double *h_boundary_u_tmp = new double[dfDataBase.num_boundary_surfaces * 3]; | ||
offset = 0; | ||
forAll(U.boundaryField(), patchi) | ||
{ | ||
const fvPatchVectorField& patchU = U.boundaryField()[patchi]; | ||
int patchsize = dfDataBase.patch_size[patchi]; | ||
|
||
if (patchU.type() == "processor" | ||
|| patchU.type() == "processorCyclic") { | ||
memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double)); | ||
vectorField patchUInternal = | ||
dynamic_cast<const processorFvPatchField<vector>&>(patchU).patchInternalField()(); | ||
memcpy(h_boundary_u_tmp + 3*offset + 3*patchsize, &patchUInternal[0][0], 3*patchsize * sizeof(double)); | ||
offset += 2 * patchsize; | ||
} else { | ||
memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double)); | ||
offset += patchsize; | ||
} | ||
} | ||
|
||
bool printFlag = false; | ||
|
||
int rank = -1; | ||
if (mpi_init_flag) { | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
} | ||
|
||
if (!mpi_init_flag || rank == 0) { | ||
// UEqn_GPU.compareResult(&UEqn.lower()[0], &UEqn.upper()[0], &UEqn.diag()[0], &UEqn.source()[0][0], | ||
// h_internal_coeffs.data(), h_boundary_coeffs.data(), | ||
// // &gradU[0][0], h_boundary_gradU, | ||
// printFlag); | ||
// UEqn_GPU.compareU(&U[0][0], h_boundary_u_tmp, printFlag); | ||
} | ||
DEBUG_TRACE; | ||
#endif | ||
|
Oops, something went wrong.