From b3c2e80ee68523da768ac880a365655ddc4e27e7 Mon Sep 17 00:00:00 2001 From: Yael Yacobovich Date: Wed, 30 Oct 2024 16:37:11 +0200 Subject: [PATCH] change bounds in ring --- src/components/ec/cuda/ec_cuda_executor.c | 1 + .../tl/ucp/allgather/allgather_ring.c | 29 +++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/components/ec/cuda/ec_cuda_executor.c b/src/components/ec/cuda/ec_cuda_executor.c index 50a8f6f24c..5d51cf924c 100644 --- a/src/components/ec/cuda/ec_cuda_executor.c +++ b/src/components/ec/cuda/ec_cuda_executor.c @@ -110,6 +110,7 @@ ucc_status_t ucc_cuda_executor_task_finalize(ucc_ee_executor_task_t *task) ucc_status_t ucc_cuda_executor_start(ucc_ee_executor_t *executor, void *ee_context) { + printf("======== in executer start ========="); ucc_ec_cuda_executor_t *eee = ucc_derived_of(executor, ucc_ec_cuda_executor_t); diff --git a/src/components/tl/ucp/allgather/allgather_ring.c b/src/components/tl/ucp/allgather/allgather_ring.c index 9a95d374ee..ad68a73828 100644 --- a/src/components/tl/ucp/allgather/allgather_ring.c +++ b/src/components/tl/ucp/allgather/allgather_ring.c @@ -43,29 +43,31 @@ void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task) ucc_rank_t sendto, recvfrom, sblock, rblock; int step; void *buf; - + if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { return; } - printf("after prog\n"); - - /*int k = 1; - int a = 1; - while(k){ - a = 1; - }*/ + printf("in progress top, rank = %d task->tagged.send_posted = %d task->tagged.recv_posted = %d \n",(int)trank, (int)task->tagged.send_posted, + (int)task->tagged.recv_posted); + sendto = ucc_ep_map_eval(task->subset.map, (trank + 1) % tsize); recvfrom = ucc_ep_map_eval(task->subset.map, (trank - 1 + tsize) % tsize); - printf("in prog,tsize = %d, trank = %d, sndto = %d, recv = %d\n", (int)tsize, (int)trank, (int)sendto, (int)recvfrom); + + + uint32_t USE_CUDA = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_cuda; + int iter = USE_CUDA ? tsize - 1 : tsize; + + while (task->tagged.send_posted < iter) { + + step = USE_CUDA ? task->tagged.send_posted : task->tagged.send_posted - 1; - while (task->tagged.send_posted < tsize - 1) { - step = task->tagged.send_posted; sblock = task->allgather_ring.get_send_block(&task->subset, trank, tsize, step); rblock = task->allgather_ring.get_recv_block(&task->subset, trank, tsize, step); buf = PTR_OFFSET(rbuf, sblock * data_size); + UCPCHECK_GOTO( ucc_tl_ucp_send_nb(buf, data_size, rmem, sendto, team, task), task, out); @@ -73,8 +75,8 @@ void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task) UCPCHECK_GOTO( ucc_tl_ucp_recv_nb(buf, data_size, rmem, recvfrom, team, task), task, out); + if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { - printf("in while loop\ntask->tagged.send_posted = %d\n", (int)task->tagged.send_posted); return; } } @@ -110,13 +112,16 @@ ucc_status_t ucc_tl_ucp_allgather_ring_start(ucc_coll_task_t *coll_task) if(USE_CUDA){ status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * block), sbuf, data_size, rmem, smem); + //printf("in use cuda: task->tagged.send_posted = %d \n", (int)task->tagged.send_posted); if (ucc_unlikely(UCC_OK != status)) { return status; } } else { /* Loopback */ UCPCHECK_GOTO(ucc_tl_ucp_send_nb(sbuf, data_size, smem, trank, team, task),task, out); + //printf("start: trank = %d send task->tagged.send_posted = %d\n",(int)trank, (int)task->tagged.send_posted); UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, data_size * block), data_size, rmem, trank, team, task),task, out); + //printf("start: trank = %d recv task->tagged.recv_posted = %d\n",(int)trank, (int)task->tagged.recv_posted); } } return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super);