forked from deepflameCFD/deepflame-dev
-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/GPU-dev'
- Loading branch information
Showing
41 changed files
with
11,492 additions
and
4,393 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,137 @@ | ||
{ | ||
volScalarField& he = thermo.he(); | ||
|
||
#if defined GPUSolverNew_ | ||
double *h_he = dfDataBase.getFieldPointer("he", location::cpu, position::internal); | ||
double *h_boundary_he = dfDataBase.getFieldPointer("he", location::cpu, position::boundary); | ||
|
||
EEqn_GPU.process(); | ||
EEqn_GPU.sync(); | ||
// EEqn_GPU.postProcess(h_he, h_boundary_he); | ||
|
||
// copy h_he to he(cpu) | ||
// memcpy(&he[0], h_he, dfDataBase.cell_value_bytes); | ||
|
||
//DEBUG_TRACE; | ||
//he.correctBoundaryConditions(); | ||
//DEBUG_TRACE; | ||
|
||
#if defined DEBUG_ | ||
fvScalarMatrix EEqn | ||
( | ||
|
||
fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he) | ||
+ fvc::ddt(rho, K) + fvc::div(phi, K) | ||
- dpdt | ||
== | ||
( | ||
turbName == "laminar" | ||
? | ||
( | ||
fvm::laplacian(turbulence->alpha(), he) | ||
- diffAlphaD | ||
+ fvc::div(hDiffCorrFlux) | ||
) | ||
: | ||
( | ||
fvm::laplacian(turbulence->alphaEff(), he) | ||
) | ||
) | ||
); | ||
// EEqn.relax(); | ||
EEqn.solve("ha"); | ||
// checkResult | ||
// TODO: for temp, now we compare ldu, finally we compare csr | ||
std::vector<double> h_internal_coeffs(dfDataBase.num_boundary_surfaces); | ||
std::vector<double> h_boundary_coeffs(dfDataBase.num_boundary_surfaces); | ||
|
||
offset = 0; | ||
forAll(he.boundaryField(), patchi) | ||
{ | ||
const fvPatchScalarField& patchHe = he.boundaryField()[patchi]; | ||
int patchSize = patchHe.size(); | ||
const double* internal_coeff_ptr = &EEqn.internalCoeffs()[patchi][0]; | ||
const double* boundary_coeff_ptr = &EEqn.boundaryCoeffs()[patchi][0]; | ||
if (patchHe.type() == "processor" | ||
|| patchHe.type() == "processorCyclic") { | ||
memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double)); | ||
memset(h_internal_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double)); | ||
memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double)); | ||
memset(h_boundary_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double)); | ||
offset += patchSize * 2; | ||
} else { | ||
memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double)); | ||
memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double)); | ||
offset += patchSize; | ||
} | ||
} | ||
|
||
double *h_boundary_he_tmp = new double[dfDataBase.num_boundary_surfaces]; | ||
offset = 0; | ||
forAll(he.boundaryField(), patchi) | ||
{ | ||
const fvPatchScalarField& patchHe = he.boundaryField()[patchi]; | ||
int patchSize = patchHe.size(); | ||
if (patchHe.type() == "processor" | ||
|| patchHe.type() == "processorCyclic") { | ||
const scalarField& patchHeInternal = dynamic_cast<const processorFvPatchField<scalar>&>(patchHe).patchInternalField()(); | ||
memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double)); | ||
memcpy(h_boundary_he_tmp + offset + patchSize, &patchHeInternal[0], patchSize * sizeof(double)); | ||
offset += patchSize * 2; | ||
} else { | ||
memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double)); | ||
offset += patchSize; | ||
} | ||
} | ||
|
||
bool printFlag = false; | ||
int rank = -1; | ||
if (mpi_init_flag) { | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
} | ||
if (!mpi_init_flag || rank == 0) { | ||
// DEBUG_TRACE; | ||
// EEqn_GPU.compareResult(&EEqn.lower()[0], &EEqn.upper()[0], &EEqn.diag()[0], &EEqn.source()[0], | ||
// h_internal_coeffs.data(), h_boundary_coeffs.data(), printFlag); | ||
// DEBUG_TRACE; | ||
// EEqn_GPU.compareHe(&he[0], h_boundary_he_tmp, printFlag); | ||
} | ||
|
||
delete h_boundary_he_tmp; | ||
|
||
#endif | ||
|
||
#else | ||
start1 = std::clock(); | ||
fvScalarMatrix EEqn | ||
( | ||
|
||
fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he) | ||
+ fvc::ddt(rho, K) + fvc::div(phi, K) | ||
- dpdt | ||
== | ||
fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he) | ||
+ fvc::ddt(rho, K) + fvc::div(phi, K) | ||
- dpdt | ||
== | ||
( | ||
turbName == "laminar" | ||
? | ||
( | ||
fvm::laplacian(turbulence->alpha(), he) | ||
- diffAlphaD | ||
+ fvc::div(hDiffCorrFlux) | ||
) | ||
: | ||
( | ||
turbName == "laminar" | ||
? | ||
( | ||
fvm::laplacian(turbulence->alpha(), he) | ||
- diffAlphaD | ||
+ fvc::div(hDiffCorrFlux) | ||
) | ||
: | ||
( | ||
fvm::laplacian(turbulence->alphaEff(), he) | ||
) | ||
fvm::laplacian(turbulence->alphaEff(), he) | ||
) | ||
); | ||
) | ||
); | ||
end1 = std::clock(); | ||
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
time_monitor_EEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
|
||
EEqn.relax(); | ||
// EEqn.relax(); | ||
start1 = std::clock(); | ||
EEqn.solve("ha"); | ||
end1 = std::clock(); | ||
time_monitor_EEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
time_monitor_EEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
#endif | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,148 @@ | ||
start1 = std::clock(); | ||
tmp<fvVectorMatrix> tUEqn | ||
( | ||
fvm::ddt(rho, U) + fvm::div(phi, U) | ||
+ turbulence->divDevRhoReff(U) | ||
); | ||
fvVectorMatrix& UEqn = tUEqn.ref(); | ||
|
||
end1 = std::clock(); | ||
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
|
||
UEqn.relax(); | ||
start1 = std::clock(); | ||
if (pimple.momentumPredictor()) | ||
{ | ||
solve(UEqn == -fvc::grad(p)); | ||
// Solve the Momentum equation | ||
#ifdef GPUSolverNew_ | ||
|
||
#if defined DEBUG_ | ||
// run CPU, for temp | ||
TICK_START; | ||
tmp<fvVectorMatrix> tUEqn | ||
( | ||
fvm::ddt(rho, U) | ||
+ | ||
fvm::div(phi, U) | ||
+ | ||
turbulence->divDevRhoReff(U) | ||
); | ||
fvVectorMatrix& UEqn = tUEqn.ref(); | ||
TICK_STOP(CPU assembly time); | ||
|
||
volTensorField gradU = fvc::grad(U); | ||
|
||
double *h_boundary_gradU = new double[dfDataBase.num_boundary_surfaces * 9]; | ||
offset = 0; | ||
forAll(U.boundaryField(), patchi) | ||
{ | ||
const fvPatchTensorField& patchGradU = gradU.boundaryField()[patchi]; | ||
int patchsize = patchGradU.size(); | ||
if (patchGradU.type() == "processor" | ||
|| patchGradU.type() == "processorCyclic") { | ||
// print info | ||
if (dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).doTransform()) { | ||
Info << "gradU transform = true" << endl; | ||
} else { | ||
Info << "gradU transform = false" << endl; | ||
} | ||
Info << "rank = " << dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).rank() << endl; | ||
|
||
memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double)); | ||
tensorField patchGradUInternal = | ||
dynamic_cast<const processorFvPatchField<tensor>&>(patchGradU).patchInternalField()(); | ||
memcpy(h_boundary_gradU + 9*offset + patchsize * 9, &patchGradUInternal[0][0], patchsize * 9 * sizeof(double)); | ||
offset += patchsize * 2; | ||
} else { | ||
memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double)); | ||
offset += patchsize; | ||
} | ||
} | ||
#endif | ||
|
||
// process | ||
TICK_START; | ||
UEqn_GPU.process(); | ||
UEqn_GPU.sync(); | ||
TICK_STOP(GPU process time); | ||
|
||
// postProcess | ||
// TICK_START; | ||
// UEqn_GPU.postProcess(h_u); | ||
// memcpy(&U[0][0], h_u, dfDataBase.cell_value_vec_bytes); | ||
// U.correctBoundaryConditions(); | ||
// K = 0.5*magSqr(U); | ||
// DEBUG_TRACE; | ||
// TICK_STOP(post process time); | ||
|
||
#if defined DEBUG_ | ||
// UEqn.relax(); | ||
TICK_START; | ||
solve(UEqn == -fvc::grad(p)); | ||
K.oldTime(); | ||
K = 0.5*magSqr(U); | ||
} | ||
end1 = std::clock(); | ||
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
TICK_STOP(CPU solve time); | ||
// checkResult | ||
// TODO: for temp, now we compare ldu, finally we compare csr | ||
std::vector<double> h_internal_coeffs(dfDataBase.num_boundary_surfaces * 3); | ||
std::vector<double> h_boundary_coeffs(dfDataBase.num_boundary_surfaces * 3); | ||
|
||
offset = 0; | ||
for (int patchi = 0; patchi < dfDataBase.num_patches; patchi++) | ||
{ | ||
const fvPatchVectorField& patchU = U.boundaryField()[patchi]; | ||
int patchsize = dfDataBase.patch_size[patchi]; | ||
const double* internal_coeff_ptr = &UEqn.internalCoeffs()[patchi][0][0]; | ||
const double* boundary_coeff_ptr = &UEqn.boundaryCoeffs()[patchi][0][0]; | ||
memcpy(h_internal_coeffs.data() + offset * 3, internal_coeff_ptr, patchsize * 3 * sizeof(double)); | ||
memcpy(h_boundary_coeffs.data() + offset * 3, boundary_coeff_ptr, patchsize * 3 * sizeof(double)); | ||
if (patchU.type() == "processor" || patchU.type() == "processorCyclic") offset += 2 * patchsize; | ||
else offset += patchsize; | ||
} | ||
|
||
double *h_boundary_u_tmp = new double[dfDataBase.num_boundary_surfaces * 3]; | ||
offset = 0; | ||
forAll(U.boundaryField(), patchi) | ||
{ | ||
const fvPatchVectorField& patchU = U.boundaryField()[patchi]; | ||
int patchsize = dfDataBase.patch_size[patchi]; | ||
|
||
if (patchU.type() == "processor" | ||
|| patchU.type() == "processorCyclic") { | ||
memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double)); | ||
vectorField patchUInternal = | ||
dynamic_cast<const processorFvPatchField<vector>&>(patchU).patchInternalField()(); | ||
memcpy(h_boundary_u_tmp + 3*offset + 3*patchsize, &patchUInternal[0][0], 3*patchsize * sizeof(double)); | ||
offset += 2 * patchsize; | ||
} else { | ||
memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double)); | ||
offset += patchsize; | ||
} | ||
} | ||
|
||
bool printFlag = false; | ||
|
||
int rank = -1; | ||
if (mpi_init_flag) { | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
} | ||
|
||
if (!mpi_init_flag || rank == 0) { | ||
// UEqn_GPU.compareResult(&UEqn.lower()[0], &UEqn.upper()[0], &UEqn.diag()[0], &UEqn.source()[0][0], | ||
// h_internal_coeffs.data(), h_boundary_coeffs.data(), | ||
// // &gradU[0][0], h_boundary_gradU, | ||
// printFlag); | ||
// UEqn_GPU.compareU(&U[0][0], h_boundary_u_tmp, printFlag); | ||
} | ||
DEBUG_TRACE; | ||
#endif | ||
|
||
#else | ||
start1 = std::clock(); | ||
tmp<fvVectorMatrix> tUEqn | ||
( | ||
fvm::ddt(rho, U) + fvm::div(phi, U) | ||
+ turbulence->divDevRhoReff(U) | ||
); | ||
fvVectorMatrix& UEqn = tUEqn.ref(); | ||
end1 = std::clock(); | ||
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
time_monitor_UEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
|
||
// UEqn.relax(); | ||
start1 = std::clock(); | ||
if (pimple.momentumPredictor()) | ||
{ | ||
solve(UEqn == -fvc::grad(p)); | ||
K.oldTime(); | ||
K = 0.5*magSqr(U); | ||
} | ||
end1 = std::clock(); | ||
time_monitor_UEqn += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
time_monitor_UEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); | ||
#endif |
Oops, something went wrong.