From 44217dbc3d5c58fb0135d2dd3d8d3f57f27c5c6e Mon Sep 17 00:00:00 2001 From: uptonwu Date: Sun, 7 Apr 2024 15:13:09 +0800 Subject: [PATCH] Separate the GPU and CPU code in the .H header files under the dfLowMachFoam solver. --- applications/solvers/dfLowMachFoam/EEqn_GPU.H | 101 +++++++ applications/solvers/dfLowMachFoam/UEqn_GPU.H | 122 +++++++++ applications/solvers/dfLowMachFoam/YEqn_GPU.H | 247 ++++++++++++++++++ applications/solvers/dfLowMachFoam/pEqn.H | 129 +++++++++ .../solvers/dfLowMachFoam/rhoEqn_GPU.H | 87 ++++++ 5 files changed, 686 insertions(+) create mode 100644 applications/solvers/dfLowMachFoam/EEqn_GPU.H create mode 100644 applications/solvers/dfLowMachFoam/UEqn_GPU.H create mode 100644 applications/solvers/dfLowMachFoam/YEqn_GPU.H create mode 100644 applications/solvers/dfLowMachFoam/pEqn.H create mode 100644 applications/solvers/dfLowMachFoam/rhoEqn_GPU.H diff --git a/applications/solvers/dfLowMachFoam/EEqn_GPU.H b/applications/solvers/dfLowMachFoam/EEqn_GPU.H new file mode 100644 index 000000000..ad118b3a9 --- /dev/null +++ b/applications/solvers/dfLowMachFoam/EEqn_GPU.H @@ -0,0 +1,101 @@ +{ + volScalarField& he = thermo.he(); + double *h_he = dfDataBase.getFieldPointer("he", location::cpu, position::internal); + double *h_boundary_he = dfDataBase.getFieldPointer("he", location::cpu, position::boundary); + + EEqn_GPU.process(); + EEqn_GPU.sync(); + // EEqn_GPU.postProcess(h_he, h_boundary_he); + + // copy h_he to he(cpu) + // memcpy(&he[0], h_he, dfDataBase.cell_value_bytes); + + //DEBUG_TRACE; + //he.correctBoundaryConditions(); + //DEBUG_TRACE; + +#if defined DEBUG_ + fvScalarMatrix EEqn + ( + + fvm::ddt(rho, he) + mvConvection->fvmDiv(phi, he) + + fvc::ddt(rho, K) + fvc::div(phi, K) + - dpdt + == + ( + turbName == "laminar" + ? + ( + fvm::laplacian(turbulence->alpha(), he) + - diffAlphaD + + fvc::div(hDiffCorrFlux) + ) + : + ( + fvm::laplacian(turbulence->alphaEff(), he) + ) + ) + ); + // EEqn.relax(); + EEqn.solve("ha"); + // checkResult + // TODO: for temp, now we compare ldu, finally we compare csr + std::vector h_internal_coeffs(dfDataBase.num_boundary_surfaces); + std::vector h_boundary_coeffs(dfDataBase.num_boundary_surfaces); + + offset = 0; + forAll(he.boundaryField(), patchi) + { + const fvPatchScalarField& patchHe = he.boundaryField()[patchi]; + int patchSize = patchHe.size(); + const double* internal_coeff_ptr = &EEqn.internalCoeffs()[patchi][0]; + const double* boundary_coeff_ptr = &EEqn.boundaryCoeffs()[patchi][0]; + if (patchHe.type() == "processor" + || patchHe.type() == "processorCyclic") { + memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double)); + memset(h_internal_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double)); + memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double)); + memset(h_boundary_coeffs.data() + offset + patchSize, 0, patchSize * sizeof(double)); + offset += patchSize * 2; + } else { + memcpy(h_internal_coeffs.data() + offset, internal_coeff_ptr, patchSize * sizeof(double)); + memcpy(h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchSize * sizeof(double)); + offset += patchSize; + } + } + + double *h_boundary_he_tmp = new double[dfDataBase.num_boundary_surfaces]; + offset = 0; + forAll(he.boundaryField(), patchi) + { + const fvPatchScalarField& patchHe = he.boundaryField()[patchi]; + int patchSize = patchHe.size(); + if (patchHe.type() == "processor" + || patchHe.type() == "processorCyclic") { + const scalarField& patchHeInternal = dynamic_cast&>(patchHe).patchInternalField()(); + memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double)); + memcpy(h_boundary_he_tmp + offset + patchSize, &patchHeInternal[0], patchSize * sizeof(double)); + offset += patchSize * 2; + } else { + memcpy(h_boundary_he_tmp + offset, &patchHe[0], patchSize * sizeof(double)); + offset += patchSize; + } + } + + bool printFlag = false; + int rank = -1; + if (mpi_init_flag) { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + } + if (!mpi_init_flag || rank == 0) { + // DEBUG_TRACE; + // EEqn_GPU.compareResult(&EEqn.lower()[0], &EEqn.upper()[0], &EEqn.diag()[0], &EEqn.source()[0], + // h_internal_coeffs.data(), h_boundary_coeffs.data(), printFlag); + // DEBUG_TRACE; + // EEqn_GPU.compareHe(&he[0], h_boundary_he_tmp, printFlag); + } + + delete h_boundary_he_tmp; + +#endif +} diff --git a/applications/solvers/dfLowMachFoam/UEqn_GPU.H b/applications/solvers/dfLowMachFoam/UEqn_GPU.H new file mode 100644 index 000000000..8859f41ba --- /dev/null +++ b/applications/solvers/dfLowMachFoam/UEqn_GPU.H @@ -0,0 +1,122 @@ +// Solve the Momentum equation +#if defined DEBUG_ + // run CPU, for temp + TICK_START; + tmp tUEqn + ( + fvm::ddt(rho, U) + + + fvm::div(phi, U) + + + turbulence->divDevRhoReff(U) + ); + fvVectorMatrix& UEqn = tUEqn.ref(); + TICK_STOP(CPU assembly time); + + volTensorField gradU = fvc::grad(U); + + double *h_boundary_gradU = new double[dfDataBase.num_boundary_surfaces * 9]; + offset = 0; + forAll(U.boundaryField(), patchi) + { + const fvPatchTensorField& patchGradU = gradU.boundaryField()[patchi]; + int patchsize = patchGradU.size(); + if (patchGradU.type() == "processor" + || patchGradU.type() == "processorCyclic") { + // print info + if (dynamic_cast&>(patchGradU).doTransform()) { + Info << "gradU transform = true" << endl; + } else { + Info << "gradU transform = false" << endl; + } + Info << "rank = " << dynamic_cast&>(patchGradU).rank() << endl; + + memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double)); + tensorField patchGradUInternal = + dynamic_cast&>(patchGradU).patchInternalField()(); + memcpy(h_boundary_gradU + 9*offset + patchsize * 9, &patchGradUInternal[0][0], patchsize * 9 * sizeof(double)); + offset += patchsize * 2; + } else { + memcpy(h_boundary_gradU + 9*offset, &patchGradU[0][0], patchsize * 9 * sizeof(double)); + offset += patchsize; + } + } +#endif + + // process + TICK_START; + UEqn_GPU.process(); + UEqn_GPU.sync(); + TICK_STOP(GPU process time); + + // postProcess + // TICK_START; + // UEqn_GPU.postProcess(h_u); + // memcpy(&U[0][0], h_u, dfDataBase.cell_value_vec_bytes); + // U.correctBoundaryConditions(); + // K = 0.5*magSqr(U); + // DEBUG_TRACE; + // TICK_STOP(post process time); + +#if defined DEBUG_ + // UEqn.relax(); + TICK_START; + solve(UEqn == -fvc::grad(p)); + K.oldTime(); + K = 0.5*magSqr(U); + TICK_STOP(CPU solve time); + // checkResult + // TODO: for temp, now we compare ldu, finally we compare csr + std::vector h_internal_coeffs(dfDataBase.num_boundary_surfaces * 3); + std::vector h_boundary_coeffs(dfDataBase.num_boundary_surfaces * 3); + + offset = 0; + for (int patchi = 0; patchi < dfDataBase.num_patches; patchi++) + { + const fvPatchVectorField& patchU = U.boundaryField()[patchi]; + int patchsize = dfDataBase.patch_size[patchi]; + const double* internal_coeff_ptr = &UEqn.internalCoeffs()[patchi][0][0]; + const double* boundary_coeff_ptr = &UEqn.boundaryCoeffs()[patchi][0][0]; + memcpy(h_internal_coeffs.data() + offset * 3, internal_coeff_ptr, patchsize * 3 * sizeof(double)); + memcpy(h_boundary_coeffs.data() + offset * 3, boundary_coeff_ptr, patchsize * 3 * sizeof(double)); + if (patchU.type() == "processor" || patchU.type() == "processorCyclic") offset += 2 * patchsize; + else offset += patchsize; + } + + double *h_boundary_u_tmp = new double[dfDataBase.num_boundary_surfaces * 3]; + offset = 0; + forAll(U.boundaryField(), patchi) + { + const fvPatchVectorField& patchU = U.boundaryField()[patchi]; + int patchsize = dfDataBase.patch_size[patchi]; + + if (patchU.type() == "processor" + || patchU.type() == "processorCyclic") { + memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double)); + vectorField patchUInternal = + dynamic_cast&>(patchU).patchInternalField()(); + memcpy(h_boundary_u_tmp + 3*offset + 3*patchsize, &patchUInternal[0][0], 3*patchsize * sizeof(double)); + offset += 2 * patchsize; + } else { + memcpy(h_boundary_u_tmp + 3*offset, &patchU[0][0], 3*patchsize * sizeof(double)); + offset += patchsize; + } + } + + bool printFlag = false; + + int rank = -1; + if (mpi_init_flag) { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + } + + if (!mpi_init_flag || rank == 0) { + // UEqn_GPU.compareResult(&UEqn.lower()[0], &UEqn.upper()[0], &UEqn.diag()[0], &UEqn.source()[0][0], + // h_internal_coeffs.data(), h_boundary_coeffs.data(), + // // &gradU[0][0], h_boundary_gradU, + // printFlag); + // UEqn_GPU.compareU(&U[0][0], h_boundary_u_tmp, printFlag); + } + DEBUG_TRACE; +#endif + diff --git a/applications/solvers/dfLowMachFoam/YEqn_GPU.H b/applications/solvers/dfLowMachFoam/YEqn_GPU.H new file mode 100644 index 000000000..85d1bb779 --- /dev/null +++ b/applications/solvers/dfLowMachFoam/YEqn_GPU.H @@ -0,0 +1,247 @@ +#if defined DEBUG_ + hDiffCorrFlux = Zero; + diffAlphaD = Zero; + sumYDiffError = Zero; + + tmp> mvConvection + ( + fv::convectionScheme::New + ( + mesh, + fields, + phi, + mesh.divScheme("div(phi,Yi_h)") + ) + ); + // auto& mgcs = dynamic_cast&>(mvConvection.ref()); + // tmp> tinterpScheme_ = mgcs.interpolationScheme()()(Y[0]); + // tmp tweights = tinterpScheme_().weights(Y[0]); + // const surfaceScalarField& weights = tweights(); + // Info << "CPU weights\n" << weights << endl; + + // auto& limitedScheme_ = dynamic_cast&>(tinterpScheme_()); + // Info << "CPU limiter\n" << limitedScheme_.limiter(Y[0]) << endl; + + forAll(Y, i) + { + sumYDiffError += chemistry->rhoD(i)*fvc::grad(Y[i]); + } + const surfaceScalarField phiUc = linearInterpolate(sumYDiffError) & mesh.Sf(); +#endif + +//MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM); +label flag_mpi_init; +MPI_Initialized(&flag_mpi_init); +if(flag_mpi_init) MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM); + +{ +#if defined DEBUG_ + // run CPU + volScalarField Yt(0.0*Y[0]); + int speciesIndex = 0; + forAll(Y, i) + { + volScalarField& Yi = Y[i]; + hDiffCorrFlux += chemistry->hai(i)*(chemistry->rhoD(i)*fvc::grad(Yi) - Yi*sumYDiffError); + diffAlphaD += fvc::laplacian(thermo.alpha()*chemistry->hai(i), Yi); + if (i != inertIndex) + { + start1 = std::clock(); + tmp DEff = chemistry->rhoD(i) + turbulence->mut()/Sct; + + #ifdef ODE_GPU_SOLVER + fvScalarMatrix YiEqn + ( + fvm::ddt(rho, Yi) + + + ( + turbName == "laminar" + ? (mvConvection->fvmDiv(phi, Yi) + mvConvection->fvmDiv(phiUc, Yi)) + : mvConvection->fvmDiv(phi, Yi) + ) + == + ( + splitting + ? fvm::laplacian(DEff(), Yi) + : (fvm::laplacian(DEff(), Yi) + RR_GPU[i]) + ) + ); + #else + fvScalarMatrix YiEqn + ( + fvm::ddt(rho, Yi) + + + ( + turbName == "laminar" + ? (mvConvection->fvmDiv(phi, Yi) + mvConvection->fvmDiv(phiUc, Yi)) + : mvConvection->fvmDiv(phi, Yi) + ) + == + ( + splitting + ? fvm::laplacian(DEff(), Yi) + : (fvm::laplacian(DEff(), Yi) + combustion->R(Yi)) + ) + ); + #endif + + end1 = std::clock(); + time_monitor_YEqn_mtxAssembly += double(end1 - start1) / double(CLOCKS_PER_SEC); + // YiEqn.relax(); + + start1 = std::clock(); + YiEqn.solve("Yi"); + end1 = std::clock(); + time_monitor_YEqn_solve += double(end1 - start1) / double(CLOCKS_PER_SEC); + + Yi.max(0.0); + Yt += Yi; + ++speciesIndex; + } + } + Y[inertIndex] = scalar(1) - Yt; + Y[inertIndex].max(0.0); + + int specie_index = 0; + + // should compute grad_yi before YiEqn.solve() + const volVectorField grad_yi = fvc::grad(Y[specie_index]); + + tmp DEff = chemistry->rhoD(specie_index) + turbulence->mut()/Sct; + fvScalarMatrix YiEqn + ( + fvm::ddt(rho, Y[specie_index]) + + mvConvection->fvmDiv(phi, Y[specie_index]) + + mvConvection->fvmDiv(phiUc, Y[specie_index]) + == + fvm::laplacian(DEff(), Y[specie_index]) + ); + // YiEqn.relax(); + // YiEqn.solve("Yi"); + // Y[specie_index].max(0.0); +#endif + + // process + YEqn_GPU.process(); + YEqn_GPU.sync(); + +#if defined DEBUG_ + std::vector h_boundary_diffAlphaD; + std::vector h_boundary_grad_yi; + std::vector h_boundary_sumYDiffError; + std::vector h_boundary_hDiffCorrFlux; + std::vector h_boundary_phiUc; + h_boundary_diffAlphaD.resize(dfDataBase.num_boundary_surfaces); + h_boundary_grad_yi.resize(dfDataBase.num_boundary_surfaces * 3); + h_boundary_sumYDiffError.resize(dfDataBase.num_boundary_surfaces * 3); + h_boundary_hDiffCorrFlux.resize(dfDataBase.num_boundary_surfaces * 3); + h_boundary_phiUc.resize(dfDataBase.num_boundary_surfaces); + offset = 0; + forAll(diffAlphaD.boundaryField(), patchi) + { + //const scalarField& patchdiffAlphaD = diffAlphaD.boundaryField()[patchi]; + const fvPatchScalarField& patchdiffAlphaD = diffAlphaD.boundaryField()[patchi]; + const fvPatchVectorField& patchgradyi = grad_yi.boundaryField()[patchi]; + const fvPatchVectorField& patchsumYDiffError = sumYDiffError.boundaryField()[patchi]; + const fvPatchVectorField& patchhDiffCorrFlux = hDiffCorrFlux.boundaryField()[patchi]; + const fvsPatchScalarField& patchphiUc = phiUc.boundaryField()[patchi]; + int patchSize = patchdiffAlphaD.size(); + if (patchdiffAlphaD.type() == "processor" + || patchdiffAlphaD.type() == "processorCyclic") { + scalarField patchdiffAlphaDInternal = dynamic_cast&>(patchdiffAlphaD).patchInternalField()(); + vectorField patchgradyiInternal = dynamic_cast&>(patchgradyi).patchInternalField()(); + vectorField patchsumYDiffErrorInternal = dynamic_cast&>(patchsumYDiffError).patchInternalField()(); + vectorField patchhDiffCorrFluxInternal = dynamic_cast&>(patchhDiffCorrFlux).patchInternalField()(); + memcpy(h_boundary_diffAlphaD.data() + offset, &patchdiffAlphaD[0], patchSize*sizeof(double)); + memcpy(h_boundary_diffAlphaD.data() + offset + patchSize, &patchdiffAlphaDInternal[0], patchSize*sizeof(double)); + memcpy(h_boundary_grad_yi.data() + offset * 3, &patchgradyi[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_grad_yi.data() + (offset + patchSize) * 3, &patchgradyiInternal[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_sumYDiffError.data() + offset * 3, &patchsumYDiffError[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_sumYDiffError.data() + (offset + patchSize) * 3, &patchsumYDiffErrorInternal[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_hDiffCorrFlux.data() + offset * 3, &patchhDiffCorrFlux[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_hDiffCorrFlux.data() + (offset + patchSize) * 3, &patchhDiffCorrFluxInternal[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_phiUc.data() + offset, &patchphiUc[0], patchSize*sizeof(double)); + memcpy(h_boundary_phiUc.data() + offset, &patchphiUc[0], patchSize*sizeof(double)); + offset += patchSize * 2; + } else { + memcpy(h_boundary_diffAlphaD.data() + offset, &patchdiffAlphaD[0], patchSize*sizeof(double)); + memcpy(h_boundary_grad_yi.data() + offset * 3, &patchgradyi[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_sumYDiffError.data() + offset * 3, &patchsumYDiffError[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_hDiffCorrFlux.data() + offset * 3, &patchhDiffCorrFlux[0][0], patchSize * 3 *sizeof(double)); + memcpy(h_boundary_phiUc.data() + offset, &patchphiUc[0], patchSize*sizeof(double)); + offset += patchSize; + } + } + DEBUG_TRACE; + // YEqn_GPU.comparediffAlphaD(&diffAlphaD[0], h_boundary_diffAlphaD.data(), false); + // YEqn_GPU.comparegradyi(&grad_yi[0][0], h_boundary_grad_yi.data(), specie_index, false); + // YEqn_GPU.comparesumYDiffError(&sumYDiffError[0][0], h_boundary_sumYDiffError.data(), false); + // YEqn_GPU.comparehDiffCorrFlux(&hDiffCorrFlux[0][0], h_boundary_hDiffCorrFlux.data(), false); + // YEqn_GPU.comparephiUc(&phiUc[0], h_boundary_phiUc.data(), false); + DEBUG_TRACE; + + // checkResult + // TODO: for temp, now we compare ldu, finally we compare csr + std::vector yeqn_h_internal_coeffs(dfDataBase.num_boundary_surfaces); + std::vector yeqn_h_boundary_coeffs(dfDataBase.num_boundary_surfaces); + + offset = 0; + forAll(Y[specie_index].boundaryField(), patchi) + { + const fvPatchScalarField& patchYi = Y[specie_index].boundaryField()[patchi]; + int patchsize = patchYi.size(); + const double* internal_coeff_ptr = &YiEqn.internalCoeffs()[patchi][0]; + const double* boundary_coeff_ptr = &YiEqn.boundaryCoeffs()[patchi][0]; + if (patchYi.type() == "processor" + || patchYi.type() == "processorCyclic") { + memcpy(yeqn_h_internal_coeffs.data() + offset, internal_coeff_ptr, patchsize * sizeof(double)); + memset(yeqn_h_internal_coeffs.data() + offset + patchsize, 0, patchsize * sizeof(double)); + memcpy(yeqn_h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchsize * sizeof(double)); + memset(yeqn_h_boundary_coeffs.data() + offset + patchsize, 0, patchsize * sizeof(double)); + offset += patchsize * 2; + } else { + memcpy(yeqn_h_internal_coeffs.data() + offset, internal_coeff_ptr, patchsize * sizeof(double)); + memcpy(yeqn_h_boundary_coeffs.data() + offset, boundary_coeff_ptr, patchsize * sizeof(double)); + offset += patchsize; + } + } + // NOTE: ldu and yi can't be compared at the same time + // to compare ldu data, you should open both DEBUG_ and DEBUG_CHECK_LDU in src_gpu + // to compare yi, you should only open DEBUG_ in src_gpu. + // Besides, if you compare ldu data, be patient to keep specie_index in YEqn.H and dfYEqn.cu the same. + //DEBUG_TRACE; + bool printFlag = false; + int rank = -1; + if (mpi_init_flag) { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + } + if (!mpi_init_flag || rank == 0) { + // YEqn_GPU.compareResult(&YiEqn.lower()[0], &YiEqn.upper()[0], &YiEqn.diag()[0], &YiEqn.source()[0], + // yeqn_h_internal_coeffs.data(), yeqn_h_boundary_coeffs.data(), printFlag); + } + + DEBUG_TRACE; + // YEqn_GPU.compareYi(&Y[specie_index][0], specie_index, false); + // DEBUG_TRACE; +#endif + + // postProcess + double *h_y = dfDataBase.getFieldPointer("y", location::cpu, position::internal); + double *h_boundary_y = dfDataBase.getFieldPointer("y", location::cpu, position::boundary); + // YEqn_GPU.postProcess(h_y, h_boundary_y); + DEBUG_TRACE; + + // copy h_y to Y(cpu) + // offset = 0; + // forAll(Y, i) + // { + // volScalarField& Yi = Y[i]; + // memcpy(&Yi[0], h_y + offset, Yi.size() * sizeof(double)); + // offset += Yi.size(); + // Yi.correctBoundaryConditions(); + // } + DEBUG_TRACE; + + fflush(stderr); + +} \ No newline at end of file diff --git a/applications/solvers/dfLowMachFoam/pEqn.H b/applications/solvers/dfLowMachFoam/pEqn.H new file mode 100644 index 000000000..aa379dde7 --- /dev/null +++ b/applications/solvers/dfLowMachFoam/pEqn.H @@ -0,0 +1,129 @@ +if (!pimple.simpleRho()) +{ + rho = thermo.rho(); +} + +// Thermodynamic density needs to be updated by psi*d(p) after the +// pressure solution +const volScalarField psip0(psi*p); + +volScalarField rAU(1.0/UEqn.A()); +surfaceScalarField rhorAUf("rhorAUf", fvc::interpolate(rho*rAU)); +volVectorField HbyA(constrainHbyA(rAU*UEqn.H(), U, p)); + +if (pimple.nCorrPiso() <= 1) +{ + tUEqn.clear(); +} + +surfaceScalarField phiHbyA +( + "phiHbyA", + fvc::interpolate(rho)*fvc::flux(HbyA) + + rhorAUf*fvc::ddtCorr(rho, U, phi, rhoUf) +); + +fvc::makeRelative(phiHbyA, rho, U); + +label flag_mpi_init; +MPI_Initialized(&flag_mpi_init); +if(flag_mpi_init) MPI_Barrier(PstreamGlobals::MPI_COMM_FOAM); + +// Update the pressure BCs to ensure flux consistency +constrainPressure(p, rho, U, phiHbyA, rhorAUf); +//start = std::clock(); +if (pimple.transonic()) +{ + surfaceScalarField phid + ( + "phid", + (fvc::interpolate(psi)/fvc::interpolate(rho))*phiHbyA + ); + + phiHbyA -= fvc::interpolate(psi*p)*phiHbyA/fvc::interpolate(rho); + + fvScalarMatrix pDDtEqn + ( + fvc::ddt(rho) + psi*correction(fvm::ddt(p)) + + fvc::div(phiHbyA) + fvm::div(phid, p) + ); + + while (pimple.correctNonOrthogonal()) + { + fvScalarMatrix pEqn(pDDtEqn - fvm::laplacian(rhorAUf, p)); + + // Relax the pressure equation to ensure diagonal-dominance + pEqn.relax(); + + pEqn.solve(); + + if (pimple.finalNonOrthogonalIter()) + { + phi = phiHbyA + pEqn.flux(); + } + } +} +else +{ + fvScalarMatrix pDDtEqn + ( + fvc::ddt(rho) + + psi*correction(fvm::ddt(p)) + + fvc::div(phiHbyA) + ); + + while (pimple.correctNonOrthogonal()) + { + fvScalarMatrix pEqn(pDDtEqn - fvm::laplacian(rhorAUf, p)); + + + pEqn.solve(); + + + if (pimple.finalNonOrthogonalIter()) + { + phi = phiHbyA + pEqn.flux(); + } + } + +} + + +bool limitedp = pressureControl.limit(p); + +// Thermodynamic density update +thermo.correctRho(psi*p - psip0); + + +if (limitedp) +{ + rho = thermo.rho(); +} + +#include "rhoEqn.H" +#include "compressibleContinuityErrs.H" + +// Explicitly relax pressure for momentum corrector +// p.relax(); + +U = HbyA - rAU*fvc::grad(p); +U.correctBoundaryConditions(); +K = 0.5*magSqr(U); + +if (pimple.simpleRho()) +{ + rho = thermo.rho(); +} + +// Correct rhoUf if the mesh is moving +fvc::correctRhoUf(rhoUf, rho, U, phi); + +if (thermo.dpdt()) +{ + dpdt = fvc::ddt(p); + + if (mesh.moving()) + { + dpdt -= fvc::div(fvc::meshPhi(rho, U), p); + } +} \ No newline at end of file diff --git a/applications/solvers/dfLowMachFoam/rhoEqn_GPU.H b/applications/solvers/dfLowMachFoam/rhoEqn_GPU.H new file mode 100644 index 000000000..4aa794f8a --- /dev/null +++ b/applications/solvers/dfLowMachFoam/rhoEqn_GPU.H @@ -0,0 +1,87 @@ +/*---------------------------------------------------------------------------*\ + ========= | + \\ / F ield | OpenFOAM: The Open Source CFD Toolbox + \\ / O peration | Website: https://openfoam.org + \\ / A nd | Copyright (C) 2011-2018 OpenFOAM Foundation + \\/ M anipulation | +------------------------------------------------------------------------------- +License + This file is part of OpenFOAM. + + OpenFOAM is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OpenFOAM is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with OpenFOAM. If not, see . + +Global + rhoEqn + +Description + Solve the continuity for density. + +\*---------------------------------------------------------------------------*/ + double *h_rho = dfDataBase.getFieldPointer("rho", location::cpu, position::internal); + double *h_boundary_rho = dfDataBase.getFieldPointer("rho", location::cpu, position::boundary); + + TICK_START; + rhoEqn_GPU.process(); + rhoEqn_GPU.sync(); + TICK_STOP(GPU process time); + + // rhoEqn_GPU.postProcess(h_rho); + // rho.oldTime(); + // memcpy(&rho[0], h_rho, dfDataBase.cell_value_bytes); + // rho.correctBoundaryConditions(); + + +#ifdef DEBUG_ + // checkValue + TICK_START; + fvScalarMatrix rhoEqn + ( + fvm::ddt(rho) + + fvc::div(phi) + ); + rhoEqn.solve(); + TICK_STOP(CPU process time); + + int rank = -1; + if (mpi_init_flag) { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + } + + // if (!mpi_init_flag || rank == 0) { + // rhoEqn_GPU.compareResult(&rhoEqn.diag()[0], &rhoEqn.source()[0], false); + // } + + offset = 0; + forAll(rho.boundaryField(), patchi) + { + const fvPatchScalarField& patchRho = rho.boundaryField()[patchi]; + int patchsize = patchRho.size(); + if (patchRho.type() == "processor" + || patchRho.type() == "processorCyclic") { + memcpy(h_boundary_rho + offset, &patchRho[0], patchsize * sizeof(double)); + scalarField patchRhoInternal = + dynamic_cast&>(patchRho).patchInternalField()(); + memcpy(h_boundary_rho + offset + patchsize, &patchRhoInternal[0], patchsize * sizeof(double)); + offset += patchsize * 2; + } else { + memcpy(h_boundary_rho + offset, &patchRho[0], patchsize * sizeof(double)); + offset += patchsize; + } + } + // if (!mpi_init_flag || rank == 0) { + // rhoEqn_GPU.compareRho(&rho[0], h_boundary_rho, false); + // } +#endif + +// ************************************************************************* //