diff --git a/src/C-interface/csr/bml_allocate_csr_typed.c b/src/C-interface/csr/bml_allocate_csr_typed.c index 4a325dde..fb6c8c79 100644 --- a/src/C-interface/csr/bml_allocate_csr_typed.c +++ b/src/C-interface/csr/bml_allocate_csr_typed.c @@ -273,6 +273,11 @@ bml_matrix_csr_t *TYPED_FUNC( * already allocated then the matrix will be deallocated in the * process. * + * * NOTE: + * The resulting nonzero structure is not necessarily uniform since we + * are sampling between [0, N], N << RAND_MAX. The diagonal entry is + * stored first. + * * \ingroup allocate_group * * \param matrix_precision The precision of the matrix. The default @@ -291,20 +296,47 @@ bml_matrix_csr_t *TYPED_FUNC( bml_matrix_csr_t *A = TYPED_FUNC(bml_zero_matrix_csr) (N, M, distrib_mode); -#pragma omp parallel for + int *col_marker = bml_allocate_memory(sizeof(int) * N ); + int *col_marker_pos = bml_allocate_memory(sizeof(int) * M ); + + const REAL_T INV_RAND_MAX = 1.0 / (REAL_T) RAND_MAX; + +/* initialize col_marker */ + for (int j = 0; j < N; j++) + { + col_marker[j] = -1; + } + +//#pragma omp parallel for for (int i = 0; i < N; i++) { - int jind = 0; csr_sparse_row_t *row = A->data_[i]; int *col_indexes = row->cols_; REAL_T *row_vals = row->vals_; + int col = i; + int nnz_row = 0; for (int j = 0; j < M; j++) { - col_indexes[jind] = j; - row_vals[jind] = rand() / (REAL_T) RAND_MAX; - jind++; + if(col_marker[col] == -1) + { + row_vals[nnz_row] = rand() * INV_RAND_MAX; + col_indexes[nnz_row] = col; + /* save position of col_marker */ + col_marker_pos[nnz_row] = col; + /* mark column index position */ + col_marker[col] = 1; + nnz_row++; + } + /* generate random column index 0 >= col < N */ + col = rand() % N; + } + /* update nnz of row */ + row->NNZ_ = nnz_row; + /* reset col_marker */ + for (int j = 0; j < nnz_row; j++) + { + col_marker[col_marker_pos[j]] = -1; } - row->NNZ_ = jind; } /** initialize hash table */ if (distrib_mode == sequential) @@ -317,6 +349,10 @@ bml_matrix_csr_t *TYPED_FUNC( A->table_ = NULL; } + /* free memory */ + bml_free_memory(col_marker); + bml_free_memory(col_marker_pos); + return A; } diff --git a/src/C-interface/ellblock/bml_allocate_ellblock_typed.c b/src/C-interface/ellblock/bml_allocate_ellblock_typed.c index ba57a2d8..43fc3718 100644 --- a/src/C-interface/ellblock/bml_allocate_ellblock_typed.c +++ b/src/C-interface/ellblock/bml_allocate_ellblock_typed.c @@ -330,35 +330,65 @@ bml_matrix_ellblock_t *TYPED_FUNC( //now fill with random values int NB = A->NB; int MB = A->MB; - REAL_T **A_ptr_value = (REAL_T **) A->ptr_value; int *A_indexb = A->indexb; int *A_nnzb = A->nnzb; + const REAL_T INV_RAND_MAX = 1.0 / (REAL_T) RAND_MAX; + + int *bcol_marker = bml_allocate_memory(sizeof(int) * NB ); + int *bcol_marker_pos = bml_allocate_memory(sizeof(int) * MB ); +/* initialize bcol_marker */ + for (int j = 0; j < NB; j++) + { + bcol_marker[j] = -1; + } + for (int ib = 0; ib < NB; ib++) { - for (int jp = 0; jp < MB; jp++) + int bcol = ib; + int bnnz_row = 0; + for (int jb = 0; jb < MB; jb++) { - int ind = ROWMAJOR(ib, jp, NB, MB); - A_indexb[ind] = jp; - int jb = A_indexb[ind]; - - //allocate storage - int nelements = bsize[ib] * bsize[jb]; - A_ptr_value[ind] = - TYPED_FUNC(bml_allocate_block_ellblock) (A, ib, nelements); - - REAL_T *A_value = A_ptr_value[ind]; - assert(A_value != NULL); - for (int ii = 0; ii < bsize[ib]; ii++) - for (int jj = 0; jj < bsize[jb]; jj++) - { - A_value[ROWMAJOR(ii, jj, bsize[ib], bsize[jb])] = - rand() / (REAL_T) RAND_MAX; + if(bcol_marker[bcol] == -1) + { + int ind = ROWMAJOR(ib, bnnz_row, NB, MB); + A_indexb[ind] = bcol; + //allocate storage + int nelements = bsize[ib] * bsize[bcol]; + A_ptr_value[ind] = + TYPED_FUNC(bml_allocate_block_ellblock) (A, ib, nelements); + + REAL_T *A_value = A_ptr_value[ind]; + assert(A_value != NULL); + for (int ii = 0; ii < bsize[ib]; ii++) + { + for (int jj = 0; jj < bsize[bcol]; jj++) + { + A_value[ROWMAJOR(ii, jj, bsize[ib], bsize[bcol])] = + rand() / (REAL_T) RAND_MAX; + } } + /* save position of col_marker */ + bcol_marker_pos[bnnz_row] = bcol; + /* mark column index position */ + bcol_marker[bcol] = 1; + bnnz_row++; + } + /* generate random column index 0 >= bcol < NB */ + bcol = rand() % NB; } - A_nnzb[ib] = MB; + A_nnzb[ib] = bnnz_row; + /* reset col_marker */ + for (int j = 0; j < bnnz_row; j++) + { + bcol_marker[bcol_marker_pos[j]] = -1; + } } + /* free memory */ + bml_free_memory(bcol_marker); + bml_free_memory(bcol_marker_pos); + return A; } diff --git a/src/C-interface/ellblock/bml_multiply_ellblock_typed.c b/src/C-interface/ellblock/bml_multiply_ellblock_typed.c index 094b620e..aefab749 100644 --- a/src/C-interface/ellblock/bml_multiply_ellblock_typed.c +++ b/src/C-interface/ellblock/bml_multiply_ellblock_typed.c @@ -234,6 +234,9 @@ void TYPED_FUNC( * * \f$ X^{2} \leftarrow X \, X \f$ * + * Note: the matrix X2 is overwritten with the result. + * Since X2 and X may have different non-zero patterns, we need to clear X2 before overwriting. + * * \ingroup multiply_group * * \param X Matrix X @@ -252,12 +255,14 @@ void *TYPED_FUNC( int *X_nnzb = X->nnzb; int *bsize = X->bsize; + REAL_T traceX = 0.0; + REAL_T **X_ptr_value = (REAL_T **) X->ptr_value; + + /* clear X2 and set pointers to data */ + TYPED_FUNC(bml_clear_ellblock) (X2); int *X2_indexb = X2->indexb; int *X2_nnzb = X2->nnzb; - - REAL_T traceX = 0.0; REAL_T traceX2 = 0.0; - REAL_T **X_ptr_value = (REAL_T **) X->ptr_value; REAL_T **X2_ptr_value = (REAL_T **) X2->ptr_value; double *trace = bml_allocate_memory(sizeof(double) * 2); @@ -411,10 +416,9 @@ void *TYPED_FUNC( int nelements = bsize[ib] * bsize[jp]; int ind = ROWMAJOR(ib, ll, NB, MB); assert(ind < NB * MB); - if (X2_ptr_value[ind] == NULL) - X2_ptr_value[ind] = - TYPED_FUNC(bml_allocate_block_ellblock) (X2, ib, - nelements); + /* Allocate memory to hold block */ + X2_ptr_value[ind] = + TYPED_FUNC(bml_allocate_block_ellblock) (X2, ib, nelements); REAL_T *X2_value = X2_ptr_value[ind]; assert(X2_value != NULL); memcpy(X2_value, xtmp, nelements * sizeof(REAL_T)); diff --git a/src/C-interface/ellpack/bml_allocate_ellpack_typed.c b/src/C-interface/ellpack/bml_allocate_ellpack_typed.c index 78882299..594fb086 100644 --- a/src/C-interface/ellpack/bml_allocate_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_allocate_ellpack_typed.c @@ -316,6 +316,12 @@ bml_matrix_ellpack_t *TYPED_FUNC( * Note that the matrix \f$ a \f$ will be newly allocated. If it is * already allocated then the matrix will be deallocated in the * process. + * + * NOTE: + * The resulting nonzero structure is not necessarily uniform since we + * are sampling between [0, N], N << RAND_MAX. The diagonal entry is + * stored first. + * * * \ingroup allocate_group * @@ -338,21 +344,50 @@ bml_matrix_ellpack_t *TYPED_FUNC( bml_matrix_ellpack_t *A = TYPED_FUNC(bml_zero_matrix_ellpack) (N, M, distrib_mode); + int *col_marker = bml_allocate_memory(sizeof(int) * N ); + int *col_marker_pos = bml_allocate_memory(sizeof(int) * M ); + REAL_T *A_value = A->value; int *A_index = A->index; int *A_nnz = A->nnz; const REAL_T INV_RAND_MAX = 1.0 / (REAL_T) RAND_MAX; + +/* initialize col_marker */ + for (int j = 0; j < N; j++) + { + col_marker[j] = -1; + } for (int i = 0; i < N; i++) { - int jind = 0; + int col = i; + int nnz_row = 0; for (int j = 0; j < M; j++) { - A_value[ROWMAJOR(i, jind, N, M)] = rand() * INV_RAND_MAX; - A_index[ROWMAJOR(i, jind, N, M)] = j; - jind++; + if(col_marker[col] == -1) + { + A_value[ROWMAJOR(i, nnz_row, N, M)] = rand() * INV_RAND_MAX; + A_index[ROWMAJOR(i, nnz_row, N, M)] = col; + /* save position of col_marker */ + col_marker_pos[nnz_row] = col; + /* mark column index position */ + col_marker[col] = 1; + nnz_row++; + } + /* generate random column index 0 >= col < N */ + col = rand() % N; + } + /* update nnz of row */ + A_nnz[i] = nnz_row; + /* reset col_marker */ + for (int j = 0; j < nnz_row; j++) + { + col_marker[col_marker_pos[j]] = -1; } - A_nnz[i] = jind; } + /* free memory */ + bml_free_memory(col_marker); + bml_free_memory(col_marker_pos); + #if defined(USE_OMP_OFFLOAD) #pragma omp target update to(A_value[:N*M], A_index[:N*M], A_nnz[:N]) #endif