You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
the black boxes are blocks and the red points are non-zero values.
the values of the matrix points are like this:
The challenge in solving this linear equation lies in the small diagonal values of the matrix. To address this, I attempted to use a block matrix approach.
Reproduction steps
Below is the code snippet. You can directly copy the entire code into a .c file and run it. In the file, you only need to adjust the value of nx and ny
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "cuda_runtime.h"
#include <cusolverSp.h>
#include <stdint.h>
#define M_PI 3.14159265358979323846
/* CUDA error macro */
#define CUDA_SAFE_CALL(call) do { \
cudaError_t err = call; \
if(cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} } while (0)
#define CUSOLVER_SAFE_CALL(call) do { \
cusolverStatus_t status = call; \
if (status != CUSOLVER_STATUS_SUCCESS) { \
fprintf(stderr, "CUSOLVER error in %s:%d. Code:%d\n", __FILE__, __LINE__, status); \
exit(1); \
} \
} while(0)
//#define AMGX_DYNAMIC_LOADING
//#undef AMGX_DYNAMIC_LOADING
#define MAX_MSG_LEN 4096
/* standard or dynamically load library */
#ifdef AMGX_DYNAMIC_LOADING
#include "amgx_capi.h"
#else
#include "amgx_c.h"
#endif
/* print error message and exit */
void errAndExit(const char* err)
{
printf("%s\n", err);
fflush(stdout);
MPI_Abort(MPI_COMM_WORLD, 1);
exit(1);
}
/* print callback (could be customized) */
void print_callback(const char* msg, int length)
{
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) { printf("%s", msg); }
}
/* print usage and exit */
void printUsageAndExit()
{
char msg[MAX_MSG_LEN] = "Usage: mpirun [-n nranks] ./example [-mode [dDDI | dDFI | dFFI]] [-p nx ny] [-c config_file] [-amg \"variable1=value1 ... variable3=value3\"] [-gpu] [-it k]\n";
strcat(msg, " -mode: select the solver mode\n");
strcat(msg, " -p nx ny: select x- and y-dimensions of the 2D (5-points) local discretization of the Poisson operator (the global problem size will be nranks*nx*ny)\n");
strcat(msg, " -c: set the amg solver options from the config file\n");
strcat(msg, " -amg: set the amg solver options from the command line\n");
print_callback(msg, MAX_MSG_LEN);
MPI_Finalize();
exit(0);
}
/* parse parameters */
int findParamIndex(char** argv, int argc, const char* parm)
{
int count = 0;
int index = -1;
for (int i = 0; i < argc; i++)
{
if (strncmp(argv[i], parm, 100) == 0)
{
index = i;
count++;
}
}
if (count == 0 || count == 1)
{
return index;
}
else
{
char msg[MAX_MSG_LEN];
sprintf(msg, "ERROR: parameter %s has been specified more than once, exiting\n", parm);
print_callback(msg, MAX_MSG_LEN);
exit(1);
}
return -1;
}
double dhudx(double hw, double he, double uw, double ue, double dx)
{
return (he * ue - hw * uw) / dx;
}
double dhvdy(double hb, double ht, double vb, double vt, double dx)
{
return (ht * vt - hb * vb) / dx;
}
double gdetadx(double gra, double etaw, double etae, double dx)
{
return gra * (etae - etaw) / dx;
}
double friction(double gra, double mann, double h, double u, double v)
{
return gra * mann * mann / pow(h, 4.0 / 3.0) * sqrt(u * u + v * v) * u;
}
void SWEValKernel(
int n,
int nx,
int ny,
int block_dim,
double dx,
double gra,
double mann,
double eddy,
int* ndd,
int* nddu,
int* nddv,
double* eta,
double* u,
double* v,
double* h,
double* detadt,
void* values,
void* rhs,
int* row_ptrs,
int* pivot_offset) {
for (int i = 0; i < n; i++) {
int row = i / nx;
int col = i % nx;
int nnz = row_ptrs[i];
int offset = pivot_offset[i];
int nnz0 = 0;
int nnz1 = 0;
int nnz2 = 0;
int nnz3 = 0;
int nnz4 = 0;
/* continuity equation start*/
nnz2 = block_dim * block_dim * (nnz + offset);
if(col > 0){
nnz0 = block_dim * block_dim * (nnz + offset - 2);
nnz1 = block_dim * block_dim * (nnz + offset - 1);
}
else {
nnz0 = block_dim * block_dim * (nnz + offset - 2);
}
int idx = block_dim * i;
if (ndd[i] == 2) { // open boundary
((double*)values)[nnz2] = 1.0;
((double*)rhs)[idx] = -eta[i];
}
else {
double he = (col < nx - 1) ? 0.5 * (h[i] + h[i + 1]) : 0.0;
double hw = (col > 0) ? 0.5 * (h[i - 1] + h[i]) : 0.0;
double ht = (row < ny - 1) ? 0.5 * (h[i] + h[i + nx]) : 0.0;
double hb = (row > 0) ? 0.5 * (h[i - nx] + h[i]) : 0.0;
double ue = (col < nx - 1) ? u[i] : 0.0;
double uw = (col > 0) ? u[i - 1] : 0.0;
double vn = (row < ny - 1) ? v[i] : 0.0;
double vs = (row > 0) ? v[i - nx] : 0.0;
double dhudx0 = dhudx(hw, he, uw, ue, dx);
double dhvdy0 = dhvdy(hb, ht, vs, vn, dx);
if (row > 0) {
((double*)values)[nnz0 + 2] = -hb / dx;
}
if (col > 0) {
((double*)values)[nnz1 + 1] = -hw / dx;
}
if (col < nx - 1) {
((double*)values)[nnz2 + 1] = he / dx;
}
if (row < ny - 1) {
((double*)values)[nnz2 + 2] = ht / dx;
}
((double*)rhs)[idx] = -dhudx0 - dhvdy0 + detadt[i];
}// continuity equation end
/* X momentom equation start */
double du = 1e-8;
nnz2 = block_dim * block_dim * (nnz + offset) + block_dim;
if (col > 0) {
nnz0 = block_dim * block_dim * (nnz + offset - 2) + block_dim;
nnz1 = block_dim * block_dim * (nnz + offset - 1) + block_dim;
}
else {
nnz0 = block_dim * block_dim * (nnz + offset - 1) + block_dim;
}
if (col < nx - 1) {
nnz3 = block_dim * block_dim * (nnz + offset + 1) + block_dim;
nnz4 = block_dim * block_dim * (nnz + offset + 2) + block_dim;
}
else {
nnz4 = block_dim * block_dim * (nnz + offset + 1) + block_dim;
}
idx = block_dim * i + 1;
if (col == nx - 1) {
((double*)values)[nnz2 + 1] = 1.0;
((double*)rhs)[idx] = 0.0;
}
else {
double h0 = 0.5 * (h[i] + h[i + 1]);
double v0, v00, v01;
double gdeta0 = gdetadx(gra, eta[i + 1], eta[i], dx);
double frc0, frcu, dfrcu;
double ddu0;
int iup = i + nx;
int idown = i - nx;
if (row == 0) {
v00 = nddv[i] < 0 ? 0.0 : 2.0 * v[i] - v[i + nx];
v01 = nddv[i + 1] < 0 ? 0.0 : 2.0 * v[i + 1] - v[i + nx + 1];
v0 = 0.25 * (v[i] + v[i + 1] + v00 + v01);
idown = iup;
((double*)values)[nnz4 + 1] = -2.0 * eddy / dx / dx;
}
else if (row == ny - 1) {
v00 = nddv[i] < 0 ? 0.0 : 2.0 * v[i - nx] - v[i - 2 * nx];
v01 = nddv[i + 1] < 0 ? 0.0 : 2.0 * v[i - nx + 1] - v[i - 2 * nx + 1];
v0 = 0.25 * (v00 + v01 + v[i - nx] + v[i - nx + 1]);
iup = idown;
((double*)values)[nnz0 + 1] = -2.0 * eddy / dx / dx;
}
else {
v0 = 0.25 * (v[i] + v[i + 1] + v[i - nx] + v[i - nx + 1]);
((double*)values)[nnz0 + 1] = -eddy / dx / dx;
((double*)values)[nnz4 + 1] = -eddy / dx / dx;
}
frc0 = friction(gra, mann, h0, u[i], v0);
frcu = friction(gra, mann, h0, u[i] + du, v0);
dfrcu = (frcu - frc0) / du;
if (nddu[i] == 2 || nddu[i + 1] == 2) // open boundary
{
ddu0 = eddy * (2.0 * u[i] - u[idown] - u[iup]) / dx / dx;
((double*)values)[nnz2 + 1] = 2.0 * eddy / dx / dx + dfrcu;
}
else {
((double*)values)[nnz2 + 1] = 4.0 * eddy / dx / dx + dfrcu;
if (col == 0) {
ddu0 = eddy * (4.0 * u[i] - u[i + 1] - u[idown] - u[iup]) / dx / dx;
((double*)values)[nnz3 + 1] = -2.0 * eddy / dx / dx;
}
else if (col == nx - 2) {
ddu0 = eddy * (4.0 * u[i] - u[i - 1] - u[idown] - u[iup]) / dx / dx;
((double*)values)[nnz1 + 1] = -2.0 * eddy / dx / dx;
}
else {
ddu0 = eddy * (4.0 * u[i] - u[i - 1] - u[i + 1] - u[idown] - u[iup]) / dx / dx;
((double*)values)[nnz1 + 1] = -eddy / dx / dx;
((double*)values)[nnz3 + 1] = -eddy / dx / dx;
}
}
((double*)values)[nnz2] = -gra / dx;
((double*)values)[nnz3] = gra / dx;
((double*)rhs)[idx] = -gdeta0 - frc0 - ddu0;
}// X momentum equation end
/* Y momentom equation start */
nnz2 = block_dim * block_dim * (nnz + offset) + block_dim * 2;
if (col > 0) {
nnz1 = block_dim * block_dim * (nnz + offset - 2) + block_dim * 2;
nnz0 = block_dim * block_dim * (nnz + offset - 1) + block_dim * 2;
}
else {
nnz1 = block_dim * block_dim * (nnz + offset - 1) + block_dim * 2;
}
if (col < nx - 1) {
nnz4 = block_dim * block_dim * (nnz + offset + 1) + block_dim * 2;
nnz3 = block_dim * block_dim * (nnz + offset + 2) + block_dim * 2;
}
else {
nnz3 = block_dim * block_dim * (nnz + offset + 1) + block_dim * 2;
}
idx = block_dim * i + 2;
if (row == ny - 1) {
((double*)values)[nnz2 + 2] = 1.0;
((double*)rhs)[idx] = 0.0;
}
else {
double h0 = 0.5 * (h[i] + h[i + nx]);
double v0, v00, v01;
double gdeta0 = gdetadx(gra, eta[i + nx], eta[i], dx);
double frc0, frcu, dfrcu;
double ddu0;
int iup = i + 1;
int idown = i - 1;
if (col == 0) {
v00 = nddu[i] < 0 ? 0.0 : 2.0 * u[i] - u[i + 1];
v01 = nddu[i + nx] < 0 ? 0.0 : 2.0 * u[i + nx] - u[i + nx + 1];
v0 = 0.25 * (u[i] + u[i + nx] + v00 + v01);
idown = iup;
((double*)values)[nnz4 + 2] = -2.0 * eddy / dx / dx;
}
else if (col == nx - 1) {
v00 = nddu[i] < 0 ? 0.0 : 2.0 * u[i - 1] - u[i - 2];
v01 = nddu[i + nx] < 0 ? 0.0 : 2.0 * u[i - 1 + nx] - u[i - 2 * 1 + nx];
v0 = 0.25 * (v00 + v01 + u[i - 1] + u[i - 1 + nx]);
iup = idown;
((double*)values)[nnz0 + 2] = -2.0 * eddy / dx / dx;
}
else {
v0 = 0.25 * (u[i] + u[i + nx] + u[i - 1] + u[i - 1 + nx]);
((double*)values)[nnz0 + 2] = -eddy / dx / dx;
((double*)values)[nnz4 + 2] = -eddy / dx / dx;
}
frc0 = friction(gra, mann, h0, v[i], v0);
frcu = friction(gra, mann, h0, v[i] + du, v0);
dfrcu = (frcu - frc0) / du;
if (nddv[i] == 2 || nddv[i + nx] == 2) // open boundary
{
ddu0 = eddy * (2.0 * v[i] - v[idown] - v[iup]) / dx / dx;
((double*)values)[nnz2 + 2] = 2.0 * eddy / dx / dx + dfrcu;
}
else {
((double*)values)[nnz2 + 2] = 4.0 * eddy / dx / dx + dfrcu;
if (row == 0) {
ddu0 = eddy * (4.0 * v[i] - v[i + nx] - v[idown] - v[iup]) / dx / dx;
((double*)values)[nnz3 + 2] = -2.0 * eddy / dx / dx;
}
else if (row == ny - 2) {
ddu0 = eddy * (4.0 * v[i] - v[i - nx] - v[idown] - v[iup]) / dx / dx;
((double*)values)[nnz1 + 2] = -2.0 * eddy / dx / dx;
}
else {
ddu0 = eddy * (4.0 * v[i] - v[i - nx] - v[i + nx] - v[idown] - v[iup]) / dx / dx;
((double*)values)[nnz1 + 2] = -eddy / dx / dx;
((double*)values)[nnz3 + 2] = -eddy / dx / dx;
}
}
((double*)values)[nnz2] = -gra / dx;
((double*)values)[nnz3] = gra / dx;
((double*)rhs)[idx] = -gdeta0 - frc0 - ddu0;
}// Y momentum equation end
}
}
int main(int argc, char** argv)
{
//parameter parsing
int pidx = 0;
int pidy = 0;
//MPI (with CUDA GPUs)
int rank = 0;
int lrank = 0;
int nranks = 0;
int n;
int nx, ny;
int gpu_count = 0;
MPI_Comm amgx_mpi_comm = MPI_COMM_WORLD;
//versions
int major, minor;
char* ver, * date, * time;
//input matrix and rhs/solution
int* partition_sizes = NULL;
int* partition_vector = NULL;
int partition_vector_size = 0;
//library handles
AMGX_Mode mode;
AMGX_config_handle cfg;
AMGX_resources_handle rsrc;
AMGX_matrix_handle A;
AMGX_vector_handle b, x;
AMGX_solver_handle solver;
AMGX_SOLVE_STATUS status;
MPI_Init(&argc, &argv);
MPI_Comm_size(amgx_mpi_comm, &nranks);
MPI_Comm_rank(amgx_mpi_comm, &rank);
CUDA_SAFE_CALL(cudaGetDeviceCount(&gpu_count));
lrank = rank % gpu_count;
CUDA_SAFE_CALL(cudaSetDevice(lrank));
printf("Process %d selecting device %d\n", rank, lrank);
nx = 20;
ny = 20;
n = nx * ny;
/* init */
AMGX_SAFE_CALL(AMGX_initialize());
AMGX_SAFE_CALL(AMGX_initialize_plugins());
/* system */
AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
AMGX_SAFE_CALL(AMGX_install_signal_handler());
mode = AMGX_mode_dDDI;
int sizeof_m_val = ((AMGX_GET_MODE_VAL(AMGX_MatPrecision, mode) == AMGX_matDouble)) ? sizeof(double) : sizeof(float);
int sizeof_v_val = ((AMGX_GET_MODE_VAL(AMGX_VecPrecision, mode) == AMGX_vecDouble)) ? sizeof(double) : sizeof(float);
AMGX_SAFE_CALL(AMGX_config_create_from_file(&cfg, "FGMRES_AGGREGATION_JACOBI.json"));
AMGX_SAFE_CALL(AMGX_config_add_parameters(&cfg, "tolerance=1e-16"));
AMGX_SAFE_CALL(AMGX_config_add_parameters(&cfg, "exception_handling=1"));
AMGX_resources_create(&rsrc, cfg, &amgx_mpi_comm, 1, &lrank);
AMGX_matrix_create(&A, rsrc, mode);
AMGX_vector_create(&x, rsrc, mode);
AMGX_vector_create(&b, rsrc, mode);
AMGX_solver_create(&solver, rsrc, mode, cfg);
int nrings; //=1; //=2;
AMGX_config_get_default_number_of_rings(cfg, &nrings);
//printf("nrings=%d\n",nrings);
int nglobal = 0;
/* generate the matrix
In more detail, this routine will create 2D (5 point) discretization of the
Poisson operator. The discretization is performed on a the 2D domain consisting
of nx and ny points in x- and y-dimension respectively. Each rank processes it's
own part of discretization points. Finally, the rhs and solution will be set to
a vector of ones and zeros, respectively. */
int* row_ptrs = (int*)malloc((n + 1) * sizeof(int));
int* pivot_offsets = (int*)malloc((n) * sizeof(int));
int64_t* col_idxs = (int64_t*)malloc(5 * n * sizeof(int64_t));
int block_dimx = 3;
int block_dimy = 3;
void* values = malloc( block_dimx * block_dimy * 5 * n * sizeof(double)); // maximum nnz
int nnz = 0;
int count = 0;
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
double dx = 5.0;
double gra = 9.81;
double mann = 0.012;
double eddy = 0.5;
double Trange = 1.0;
double Tperiod = 43200.0;
int* ndd;
int* nddu;
int* nddv;
double* u;
double* v;
double* eta;
double* h;
double* detadt;
void* rhs;
void* h_x;
// Allocate memory on host
ndd = (int*)malloc(n * sizeof(int));
nddu = (int*)malloc(n * sizeof(int));
nddv = (int*)malloc(n * sizeof(int));
u = (double*)malloc(n * sizeof(double));
v = (double*)malloc(n * sizeof(double));
eta = (double*)malloc(n * sizeof(double));
h = (double*)malloc(n * sizeof(double));
detadt = (double*)malloc(n * sizeof(double));
rhs = malloc(block_dimx * n * sizeof(double));
h_x = malloc(block_dimx * n * sizeof(double));
// mebset h_x to be 0
memset(h_x, 0, (block_dimx * n) * sizeof(double));
// memset values to be 0
memset(values, 0, (block_dimx * block_dimy * 5 * n) * sizeof(double));
// Allocate memory on device
double* d_values;
int* d_row_ptrs;
int* d_col_idxs;
double* d_rhs;
double* d_x;
// Initialize host memory
for (int i = 0; i < n; i++) {
int row = i / nx; // row index in the 2D work array
int col = i % nx; // col index in the 1D work array
if (col == 0) {
ndd[i] = 2;
}
else {
ndd[i] = 0;
}
if (col == 0) {
nddu[i] = 2;
}
else if (col == nx - 1) {
nddu[i] = -1;
}
else {
nddu[i] = 0;
}
if (row == 0) {
nddv[i] = -1;
}
else if (row == ny - 1) {
nddv[i] = -1;
}
else {
nddv[i] = 0;
}
u[i] = 0.0;
v[i] = 0.0;
eta[i] = 0.0;
h[i] = 2.0;
detadt[i] = M_PI * Trange / Tperiod;
}
// initialize sparse matrix (determine row_ptrs and col_idxs)
nnz = 0;
for (int i = 0; i < n; i++) {
int row = i / nx;
int col = i % nx;
row_ptrs[i] = nnz;
if (col == 0 && row == 0){
pivot_offsets[i] = 0;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + 1;
col_idxs[nnz++] = i + nx;
}
else if (col == 0 && row == ny - 1) {
pivot_offsets[i] = 1;
col_idxs[nnz++] = i - nx;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + 1;
}
else if (col == nx - 1 && row == 0) {
pivot_offsets[i] = 1;
col_idxs[nnz++] = i - 1;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + nx;
}
else if (col == nx - 1 && row == ny - 1) {
pivot_offsets[i] = 2;
col_idxs[nnz++] = i - nx;
col_idxs[nnz++] = i - 1;
col_idxs[nnz++] = i;
}
else if (col == 0) {
pivot_offsets[i] = 1;
col_idxs[nnz++] = i - nx;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + 1;
col_idxs[nnz++] = i + nx;
}
else if (col == nx - 1) {
pivot_offsets[i] = 2;
col_idxs[nnz++] = i - nx;
col_idxs[nnz++] = i - 1;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + nx;
}
else if (row == 0) {
pivot_offsets[i] = 1;
col_idxs[nnz++] = i - 1;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + 1;
col_idxs[nnz++] = i + nx;
}
else if (row == ny - 1) {
pivot_offsets[i] = 2;
col_idxs[nnz++] = i - nx;
col_idxs[nnz++] = i - 1;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + 1;
}
else {
pivot_offsets[i] = 2;
col_idxs[nnz++] = i - nx;
col_idxs[nnz++] = i - 1;
col_idxs[nnz++] = i;
col_idxs[nnz++] = i + 1;
col_idxs[nnz++] = i + nx;
}
}
row_ptrs[n] = nnz;
FILE* fp;
for (int it = 0; it < 1; it++) {
SWEValKernel(n, nx, ny, block_dimx, dx, gra, mann, eddy, ndd, nddu, nddv, eta, u, v, h, detadt, (double*)values, rhs, row_ptrs, pivot_offsets);
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
fp = fopen("values.bin", "wb");
fwrite(values, sizeof(double), block_dimx* block_dimy * nnz, fp);
fclose(fp);
// print values to a text file
fp = fopen("values.txt", "w");
for (int i = 0; i < block_dimx * block_dimy * nnz; i++) {
fprintf(fp, "%f\n", ((double*)values)[i]);
}
fclose(fp);
fp = fopen("rhs.bin", "wb");
fwrite(rhs, sizeof(double), 3 * n, fp);
fclose(fp);
// print rhs to a text file
fp = fopen("rhs.txt", "w");
for (int i = 0; i < 3 * n; i++) {
fprintf(fp, "%f\n", ((double*)rhs)[i]);
}
fclose(fp);
fp = fopen("row_ptrs.bin", "wb");
fwrite(row_ptrs, sizeof(int), (n + 1), fp);
fclose(fp);
// print row_ptrs to a text file
fp = fopen("row_ptrs.txt", "w");
for (int i = 0; i < n + 1; i++) {
fprintf(fp, "%d\n", row_ptrs[i]);
}
fclose(fp);
fp = fopen("col_idxs.bin", "wb");
fwrite(col_idxs, sizeof(int64_t), 5 * n, fp);
fclose(fp);
// print col_idxs to a text file
fp = fopen("col_idxs.txt", "w");
for (int i = 0; i < 5 * n; i++) {
fprintf(fp, "%d\n", col_idxs[i]);
}
fclose(fp);
AMGX_matrix_upload_all_global(A,
n, n, nnz, block_dimx, block_dimx,
row_ptrs, col_idxs, values, NULL,
nrings, nrings, NULL);
/* set the connectivity information (for the vector) */
AMGX_vector_bind(x, A);
AMGX_vector_bind(b, A);
/* upload the vector (and the connectivity information) */
AMGX_vector_upload(x, n, 3, h_x);
AMGX_vector_upload(b, n, 3, rhs);
/* solver setup */
//MPI barrier for stability (should be removed in practice to maximize performance)
MPI_Barrier(amgx_mpi_comm);
AMGX_solver_setup(solver, A);
/* solver solve */
//MPI barrier for stability (should be removed in practice to maximize performance)
MPI_Barrier(amgx_mpi_comm);
AMGX_solver_solve(solver, b, x);
/* example of how to change parameters between non-linear iterations */
//AMGX_config_add_parameters(&cfg, "config_version=2, default:tolerance=1e-12");
//AMGX_solver_solve(solver, b, x);
/* example of how to replace coefficients between non-linear iterations */
//AMGX_matrix_replace_coefficients(A, n, nnz, values, diag);
//AMGX_solver_setup(solver, A);
//AMGX_solver_solve(solver, b, x);
AMGX_solver_get_status(solver, &status);
/* example of how to get (the local part of) the solution */
//int sizeof_v_val;
//sizeof_v_val = ((NVAMG_GET_MODE_VAL(NVAMG_VecPrecision, mode) == NVAMG_vecDouble))? sizeof(double): sizeof(float);
//
void* SLN = malloc((3 * n) * sizeof(double));
AMGX_vector_download(x, SLN);
//// update eta, u and v
for (int i = 0; i < n; i++) {
eta[i] = eta[i] + ((double*)SLN)[i * 3];
}
//for (int i = 0; i < n + ny; i++) {
// u[i] = u[i] + ((double*)SLN)[n + i];
//}
//for (int i = 0; i < n + nx; i++) {
// v[i] = v[i] + ((double*)SLN)[2 * n + ny + i];
//}
}
//fp = fopen("SLN2.bin", "wb");
//fwrite(h_x, sizeof(double), (3 * n + nx + ny), fp);
//fclose(fp);
//// export u
//fp = fopen("u.bin", "wb");
//fwrite(u, sizeof(double), (n + ny), fp);
//fclose(fp);
//// export v
//fp = fopen("v.bin", "wb");
//fwrite(v, sizeof(double), (n + nx), fp);
//fclose(fp);
// export eta
fp = fopen("eta.bin", "wb");
fwrite(eta, sizeof(double), n, fp);
fclose(fp);
///////////////////////////////////////////////////////
////////////////////////////////////////////////////
//
}
The text was updated successfully, but these errors were encountered:
I generate a block matrix with 3x3 blocks. and I used FGMRES_AGGREGATION_JACOBI.json configureation to solve the system but it returns:
Environment information:
OS: Windows 11
CUDA runtime: CUDA 11.
AMGX version: v2.3.0
NVIDIA driver: 12.2
NVIDIA GPU: 3060ti
AMGX solver configuration
{
"config_version": 2,
"solver": {
"preconditioner": {
"error_scaling": 0,
"print_grid_stats": 1,
"algorithm": "AGGREGATION",
"solver": "AMG",
"smoother": "BLOCK_JACOBI",
"presweeps": 0,
"selector": "SIZE_2",
"coarse_solver": "NOSOLVER",
"max_iters": 1,
"min_coarse_rows": 32,
"relaxation_factor": 0.75,
"scope": "amg",
"max_levels": 100,
"postsweeps": 3,
"cycle": "V"
},
"use_scalar_norm": 1,
"solver": "FGMRES",
"print_solve_stats": 1,
"obtain_timings": 1,
"max_iters": 1000,
"monitor_residual": 1,
"gmres_n_restart": 32,
"convergence": "RELATIVE_INI",
"scope": "main",
"tolerance": 0.0001,
"norm": "L2"
}
}
Matrix Data
The form of matrix is in 3x3 block CSR form
col_idxs.txt
rhs.txt
row_ptrs.txt
values.txt
the shape of the sparse matrix is like this:
the black boxes are blocks and the red points are non-zero values.
the values of the matrix points are like this:
The challenge in solving this linear equation lies in the small diagonal values of the matrix. To address this, I attempted to use a block matrix approach.
Reproduction steps
Below is the code snippet. You can directly copy the entire code into a .c file and run it. In the file, you only need to adjust the value of nx and ny
The text was updated successfully, but these errors were encountered: