Skip to content

Commit

Permalink
TL/MLX5: clean and fix after rebase
Browse files Browse the repository at this point in the history
lintrunner

cleaning
  • Loading branch information
samnordmann committed Dec 18, 2024
1 parent b8fe903 commit 43dd1d7
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 161 deletions.
6 changes: 3 additions & 3 deletions src/components/tl/mlx5/alltoall/alltoall.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ typedef struct ucc_tl_mlx5_alltoall_node {
struct mlx5dv_mkey *team_recv_mkey;
void *umr_entries_buf;
struct ibv_mr *umr_entries_mr;
int fanin_index;
int fanin_dist;
int fanin_max_dist;
int fanin_index;
int fanin_dist;
int fanin_max_dist;
} ucc_tl_mlx5_alltoall_node_t;

typedef struct alltoall_net_ctrl {
Expand Down
255 changes: 144 additions & 111 deletions src/components/tl/mlx5/alltoall/alltoall_coll.c

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions src/components/tl/mlx5/alltoall/alltoall_mkeys.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ ucc_status_t ucc_tl_mlx5_populate_send_recv_mkeys(ucc_tl_mlx5_team_t * team,
if (ucc_tl_mlx5_get_my_ctrl(a2a, seq_index)->mkey_cache_flag &
UCC_MLX5_NEED_SEND_MKEY_UPDATE) {
repeat_count = nbc ? a2a->net.sbgp->group_size
: UCC_TL_TEAM_SIZE(team) / req->alltoall.block_width;
: UCC_TL_TEAM_SIZE(team) / req->alltoall.block_width;
for (i = 0; i < n_mkeys; i++) {
status = populate_strided_mkey(a2a, send_mem_access_flags,
node->ops[seq_index].send_mkeys[i],
Expand All @@ -314,8 +314,9 @@ ucc_status_t ucc_tl_mlx5_populate_send_recv_mkeys(ucc_tl_mlx5_team_t * team,
}
if (ucc_tl_mlx5_get_my_ctrl(a2a, seq_index)->mkey_cache_flag &
UCC_MLX5_NEED_RECV_MKEY_UPDATE) {
repeat_count = nbc ? a2a->net.sbgp->group_size
: UCC_TL_TEAM_SIZE(team) / req->alltoall.block_height;
repeat_count =
nbc ? a2a->net.sbgp->group_size
: UCC_TL_TEAM_SIZE(team) / req->alltoall.block_height;
for (i = 0; i < n_mkeys; i++) {
status = populate_strided_mkey(a2a, recv_mem_access_flags,
node->ops[seq_index].recv_mkeys[i],
Expand All @@ -336,7 +337,7 @@ static void update_mkey_entry(ucc_tl_mlx5_alltoall_t *a2a,
{
ucc_tl_mlx5_alltoall_node_t *node = &a2a->node;
int block_height = req->alltoall.block_height;
int block_width = req->alltoall.block_width;
int block_width = req->alltoall.block_width;
size_t msg_size = req->alltoall.msg_size;
int nbc = req->alltoall.num_of_blocks_columns;
struct ibv_mr *buff = direction_send
Expand All @@ -349,7 +350,8 @@ static void update_mkey_entry(ucc_tl_mlx5_alltoall_t *a2a,
mkey_entry = (umr_t *)(direction_send ? MY_SEND_UMR_DATA(req, a2a, 0)
: MY_RECV_UMR_DATA(req, a2a, 0));
mkey_entry->addr = (uintptr_t)buff->addr;
mkey_entry->bytes_count = (direction_send? block_width : block_height) * msg_size;
mkey_entry->bytes_count =
(direction_send ? block_width : block_height) * msg_size;
mkey_entry->bytes_skip = 0;
mkey_entry->lkey = direction_send ? buff->lkey : buff->rkey;
} else {
Expand Down
27 changes: 17 additions & 10 deletions src/components/tl/mlx5/tl_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ static ucc_config_field_t ucc_tl_mlx5_lib_config_table[] = {
ucc_offsetof(ucc_tl_mlx5_lib_config_t, dm_buf_num),
UCC_CONFIG_TYPE_ULUNITS},

{"FORCE_REGULAR", "y", "Force the regular case where the block dimensions "
"divide ppn. Requires BLOCK_SIZE=0",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, force_regular), UCC_CONFIG_TYPE_BOOL},
{"FORCE_REGULAR", "y",
"Force the regular case where the block dimensions "
"divide ppn. Requires BLOCK_SIZE=0",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, force_regular),
UCC_CONFIG_TYPE_BOOL},

{"FORCE_LONGER", "y", "Force the blocks to have more height than width",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, force_longer), UCC_CONFIG_TYPE_BOOL},
ucc_offsetof(ucc_tl_mlx5_lib_config_t, force_longer),
UCC_CONFIG_TYPE_BOOL},

{"FORCE_WIDER", "n", "Force the blocks to have more width than height",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, force_wider), UCC_CONFIG_TYPE_BOOL},
Expand Down Expand Up @@ -118,18 +121,21 @@ static ucc_config_field_t ucc_tl_mlx5_lib_config_table[] = {
ucc_offsetof(ucc_tl_mlx5_lib_config_t, fanin_kn_radix),
UCC_CONFIG_TYPE_UINT},

{"SEND_BATCH_SIZE", "1", "number of blocks that are transposed "
"on the NIC before being sent as a batch to a remote peer",
{"SEND_BATCH_SIZE", "1",
"number of blocks that are transposed "
"on the NIC before being sent as a batch to a remote peer",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, block_batch_size),
UCC_CONFIG_TYPE_UINT},

{"NBR_SERIALIZED_BATCHES", "1", "number of block batches "
"(within the set of blocks to be sent to a given remote peer) "
{"NBR_SERIALIZED_BATCHES", "1",
"number of block batches "
"(within the set of blocks to be sent to a given remote peer) "
"serialized on the same device memory chunk",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, nbr_serialized_batches),
UCC_CONFIG_TYPE_UINT},

{"NBR_BATCHES_PER_PASSAGE", "32", "",
{"NBR_BATCHES_PER_PASSAGE", "32",
"number of batches of blocks sent to one remote node before enqueing",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, nbr_batches_per_passage),
UCC_CONFIG_TYPE_UINT},

Expand All @@ -155,7 +161,8 @@ static ucc_config_field_t ucc_tl_mlx5_context_config_table[] = {
ucc_offsetof(ucc_tl_mlx5_context_config_t, mcast_ctx_conf.ib_dev_name),
UCC_CONFIG_TYPE_STRING},
{"FANIN_NPOLLS", "1000",
"Number of shared memory polling before returning UCC_INPROGRESS during internode FANIN",
"Number of shared memory polling before returning UCC_INPROGRESS during "
"internode FANIN",
ucc_offsetof(ucc_tl_mlx5_context_config_t, npolls), UCC_CONFIG_TYPE_UINT},

{NULL}};
Expand Down
6 changes: 3 additions & 3 deletions src/components/tl/mlx5/tl_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ typedef struct ucc_tl_mlx5_lib_config {
int nbr_batches_per_passage;
int block_batch_size;
int force_regular;
int force_longer;
int force_wider;
int force_longer;
int force_wider;
} ucc_tl_mlx5_lib_config_t;

typedef struct ucc_tl_mlx5_context_config {
Expand Down Expand Up @@ -103,7 +103,7 @@ typedef struct ucc_tl_mlx5_task ucc_tl_mlx5_task_t;
typedef struct ucc_tl_mlx5_schedule ucc_tl_mlx5_schedule_t;
typedef struct ucc_tl_mlx5_dm_chunk_t {
uintptr_t addr; // 0 based offset from the beginning of
// memic_mr (obtained with ibv_reg_dm_mr)
// memic_mr (obtained with ibv_reg_dm_mr)
ucc_tl_mlx5_schedule_t *task;
int posted_sends;
int posted_all;
Expand Down
39 changes: 18 additions & 21 deletions src/components/tl/mlx5/tl_mlx5_dm.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,16 @@ static void ucc_tl_mlx5_dm_chunk_init(ucc_mpool_t *mp, //NOLINT
ucc_tl_mlx5_dm_chunk_t *c = (ucc_tl_mlx5_dm_chunk_t *)obj;
ucc_tl_mlx5_team_t *team =
ucc_container_of(mp, ucc_tl_mlx5_team_t, dm_pool);
c->addr = (uintptr_t)PTR_OFFSET(
(UCC_TL_MLX5_TEAM_LIB(team)->cfg.dm_host)?
team->dm_ptr : NULL,
team->dm_offset);
team->dm_offset = team->dm_offset +
UCC_TL_MLX5_TEAM_LIB(team)->cfg.dm_buf_size
* UCC_TL_MLX5_TEAM_LIB(team)->cfg.block_batch_size;
c->posted_sends = 0;
c->posted_all=0;
c->completed_sends = 0;
}

c->offset = (ptrdiff_t)team->dm_offset;
team->dm_offset = PTR_OFFSET(team->dm_offset,
UCC_TL_MLX5_TEAM_LIB(team)->cfg.dm_buf_size);
c->addr = (uintptr_t)PTR_OFFSET(
(UCC_TL_MLX5_TEAM_LIB(team)->cfg.dm_host) ? team->dm_ptr : NULL,
team->dm_offset);
c->posted_sends = 0;
c->posted_all = 0;
c->completed_sends = 0;
team->dm_offset =
team->dm_offset + UCC_TL_MLX5_TEAM_LIB(team)->cfg.dm_buf_size *
UCC_TL_MLX5_TEAM_LIB(team)->cfg.block_batch_size;
}

static ucc_mpool_ops_t ucc_tl_mlx5_dm_ops = {
Expand Down Expand Up @@ -230,17 +225,19 @@ ucc_status_t ucc_tl_mlx5_dm_init(ucc_tl_mlx5_team_t *team)
}

status = ucc_tl_mlx5_dm_alloc_reg(
ctx->shared_ctx, ctx->shared_pd, cfg->dm_host, cfg->dm_buf_size * cfg->block_batch_size,
&cfg->dm_buf_num, &team->dm_ptr, &team->dm_mr, UCC_TL_TEAM_LIB(team));
ctx->shared_ctx, ctx->shared_pd, cfg->dm_host,
cfg->dm_buf_size * cfg->block_batch_size, &cfg->dm_buf_num,
&team->dm_ptr, &team->dm_mr, UCC_TL_TEAM_LIB(team));
if (status != UCC_OK) {
goto err_dm_alloc;
}
team->dm_offset = 0;
// TODO: fix case dm_host=true
status = ucc_mpool_init(&team->dm_pool, 0, sizeof(ucc_tl_mlx5_dm_chunk_t),
0, UCC_CACHE_LINE_SIZE, 1,
cfg->dm_buf_num, &ucc_tl_mlx5_dm_ops,
ctx->super.super.ucc_context->thread_mode, "mlx5 dm pool");
// TODO: fix/check the case dm_host=true
ucc_assert(!cfg->dm_host);
status = ucc_mpool_init(
&team->dm_pool, 0, sizeof(ucc_tl_mlx5_dm_chunk_t), 0,
UCC_CACHE_LINE_SIZE, 1, cfg->dm_buf_num, &ucc_tl_mlx5_dm_ops,
ctx->super.super.ucc_context->thread_mode, "mlx5 dm pool");
if (status != UCC_OK) {
tl_debug(UCC_TL_TEAM_LIB(team), "failed to init dm pool");
goto err_mpool_init;
Expand Down
3 changes: 2 additions & 1 deletion src/components/tl/mlx5/tl_mlx5_wqe.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ static inline uint8_t get_umr_mr_flags(uint32_t acc)

typedef struct transpose_seg {
__be32 element_size; /* 8 bit value */
__be16 num_cols; /* 7 bit value */ //TODO: from PRM we should have the rows first and then the colls... is this a bug ?
//From PRM we should have the rows first and then the colls. This is probably a naming error
__be16 num_cols; /* 7 bit value */
__be16 num_rows; /* 7 bit value */
__be64 padding;
} transpose_seg_t;
Expand Down
7 changes: 0 additions & 7 deletions test/mpi/buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,6 @@ ucc_status_t compare_buffers(void *_rst, void *expected, size_t count,
} else {
status = memcmp(rst, expected, count*ucc_dt_size(dt)) ?
UCC_ERR_NO_MESSAGE : UCC_OK;
// uint8_t* a = (uint8_t*)rst;
// uint8_t* b = (uint8_t*)expected;
// for (int i=0; i<count*ucc_dt_size(dt); i++ ){
// if (a[i] != b[i]) {
// printf("!?!?!?!?!? FAILUUUUUURE at i=%d, a=%hhn, b=%hhn, range=%ld\n", i, a, b, count*ucc_dt_size(dt));
// }
// }
}

if (UCC_MEMORY_TYPE_HOST != mt && UCC_MEMORY_TYPE_CUDA_MANAGED != mt) {
Expand Down

0 comments on commit 43dd1d7

Please sign in to comment.