diff --git a/src/C-interface/dense/bml_multiply_dense_typed.c b/src/C-interface/dense/bml_multiply_dense_typed.c index 7d7e8ff0..ac28490b 100644 --- a/src/C-interface/dense/bml_multiply_dense_typed.c +++ b/src/C-interface/dense/bml_multiply_dense_typed.c @@ -68,6 +68,7 @@ void TYPED_FUNC( MAGMA(gemm) (MagmaNoTrans, MagmaNoTrans, A->N, A->N, A->N, alpha_, B->matrix, B->ld, A->matrix, A->ld, beta_, C->matrix, C->ld, bml_queue()); + magma_queue_sync(bml_queue()); #elif defined(MKL_GPU) int sizea = A->N * A->N; int dnum = 0; diff --git a/src/C-interface/dense/bml_transpose_dense_typed.c b/src/C-interface/dense/bml_transpose_dense_typed.c index d3f99de4..42fa6496 100644 --- a/src/C-interface/dense/bml_transpose_dense_typed.c +++ b/src/C-interface/dense/bml_transpose_dense_typed.c @@ -50,6 +50,7 @@ bml_matrix_dense_t *TYPED_FUNC( #ifdef BML_USE_MAGMA MAGMABLAS(transpose) (A->N, A->N, A->matrix, A->ld, B->matrix, B->ld, bml_queue()); + magma_queue_sync(bml_queue()); #else #ifdef MKL_GPU #pragma omp target update from(A_matrix[0:N*N])