-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
187 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/** | ||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#include "bcast.h" | ||
#include "components/mc/ucc_mc.h" | ||
|
||
ucc_base_coll_alg_info_t | ||
ucc_tl_cuda_bcast_algs[UCC_TL_CUDA_BCAST_ALG_LAST + 1] = { | ||
[UCC_TL_CUDA_BCAST_ALG_LINEAR] = {.id = UCC_TL_CUDA_BCAST_ALG_LINEAR, | ||
.name = "linear", | ||
.desc = "linear bcast algorithm"}, | ||
[UCC_TL_CUDA_BCAST_ALG_LAST] = {.id = 0, .name = NULL, .desc = NULL}}; | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t *tl_team, | ||
ucc_coll_task_t **task_p) | ||
{ | ||
ucc_tl_cuda_team_t *team = ucc_derived_of(tl_team, ucc_tl_cuda_team_t); | ||
|
||
if (ucc_tl_cuda_team_topo_is_fully_conntected(team->topo)) { | ||
return ucc_tl_cuda_bcast_linear_init(coll_args, tl_team, task_p); | ||
} else { | ||
return UCC_ERR_NOT_SUPPORTED; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/** | ||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#ifndef BCAST_H_ | ||
#define BCAST_H_ | ||
|
||
#include "tl_cuda.h" | ||
#include "tl_cuda_coll.h" | ||
|
||
enum | ||
{ | ||
UCC_TL_CUDA_BCAST_ALG_LINEAR, | ||
UCC_TL_CUDA_BCAST_ALG_LAST | ||
}; | ||
|
||
extern ucc_base_coll_alg_info_t | ||
ucc_tl_cuda_bcast_algs[UCC_TL_CUDA_BCAST_ALG_LAST + 1]; | ||
|
||
#define UCC_TL_CUDA_BCAST_DEFAULT_ALG_SELECT_STR "bcast:cuda:@0" | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t *tl_team, | ||
ucc_coll_task_t **task_p); | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_linear_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t *tl_team, | ||
ucc_coll_task_t **task_p); | ||
|
||
static inline int ucc_tl_cuda_bcast_alg_from_str(const char *str) | ||
{ | ||
int i; | ||
for (i = 0; i < UCC_TL_CUDA_BCAST_ALG_LAST; i++) { | ||
if (0 == strcasecmp(str, ucc_tl_cuda_bcast_algs[i].name)) { | ||
break; | ||
} | ||
} | ||
return i; | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/** | ||
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#include "bcast/bcast.h" | ||
|
||
enum | ||
{ | ||
STAGE_SYNC, /*< Wait for free SYNC segment */ | ||
STAGE_SETUP, /*< Wait for memhandle setup to finish */ | ||
STAGE_COPIES, /*< Linear algorithm is running */ | ||
STAGE_BARRIER, /*< Linear algorithm is done, waiting for | ||
* other ranks to finish */ | ||
}; | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_linear_finalize(ucc_coll_task_t *coll_task) | ||
{ | ||
ucc_tl_cuda_task_t *task = ucc_derived_of(coll_task, ucc_tl_cuda_task_t); | ||
|
||
tl_trace(UCC_TASK_LIB(task), "finalizing task %p", task); | ||
ucc_tl_cuda_task_put(task); | ||
return UCC_OK; | ||
} | ||
|
||
void ucc_tl_cuda_bcast_linear_progress(ucc_coll_task_t *coll_task) | ||
{ | ||
ucc_tl_cuda_task_t *task = ucc_derived_of(coll_task, ucc_tl_cuda_task_t); | ||
ucc_tl_cuda_team_t *team = TASK_TEAM(task); | ||
ucc_status_t st; | ||
|
||
task->super.status = UCC_INPROGRESS; | ||
} | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_linear_start(ucc_coll_task_t *coll_task) | ||
{ | ||
ucc_tl_cuda_task_t *task = ucc_derived_of(coll_task, ucc_tl_cuda_task_t); | ||
ucc_tl_cuda_team_t *team = TASK_TEAM(task); | ||
ucc_coll_args_t * args = &TASK_ARGS(task); | ||
ucc_rank_t tsize = UCC_TL_TEAM_SIZE(team); | ||
ucc_datatype_t dt = task->allgatherv_linear.dt; | ||
ucc_rank_t i; | ||
size_t send_size, frag_size, ssize; | ||
|
||
task->bcast_linear.stage = STAGE_SYNC; | ||
task->allgatherv_linear.sbuf = args->src.info.buffer; | ||
|
||
|
||
return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); | ||
} | ||
|
||
ucc_status_t ucc_tl_cuda_bcast_linear_init(ucc_base_coll_args_t *coll_args, | ||
ucc_base_team_t * tl_team, | ||
ucc_coll_task_t ** task_p) | ||
{ | ||
ucc_tl_cuda_team_t *team = ucc_derived_of(tl_team, ucc_tl_cuda_team_t); | ||
ucc_tl_cuda_task_t *task; | ||
ucc_status_t status; | ||
|
||
if (ucc_unlikely(!ucc_tl_cuda_team_topo_is_fully_conntected(team->topo) || | ||
UCC_TL_TEAM_SIZE(team) - 1 > UCC_EE_EXECUTOR_MULTI_OP_NUM_BUFS)) { | ||
return UCC_ERR_NOT_SUPPORTED; | ||
} | ||
|
||
status = ucc_tl_cuda_task_init(coll_args, team, &task); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
return status; | ||
} | ||
|
||
// task->allgatherv_linear.get_count = ucc_tl_cuda_allgather_get_count; | ||
// task->allgatherv_linear.get_offset = ucc_tl_cuda_allgather_get_offset; | ||
// task->allgatherv_linear.dt = coll_args->args.dst.info.datatype; | ||
// task->allgatherv_linear.sbuf = coll_args->args.src.info.buffer; | ||
// task->allgatherv_linear.rbuf = coll_args->args.dst.info.buffer; | ||
|
||
task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR; | ||
task->super.post = ucc_tl_cuda_allgatherv_linear_start; | ||
task->super.progress = ucc_tl_cuda_allgatherv_linear_progress; | ||
task->super.finalize = ucc_tl_cuda_allgatherv_linear_finalize; | ||
task->bar = TASK_BAR(task); | ||
|
||
*task_p = &task->super; | ||
return UCC_OK; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters