diff --git a/runtime/src/tasks/atmi/tasks-atmi.c b/runtime/src/tasks/atmi/tasks-atmi.c index daeb6be8fa..9194494b18 100644 --- a/runtime/src/tasks/atmi/tasks-atmi.c +++ b/runtime/src/tasks/atmi/tasks-atmi.c @@ -67,6 +67,10 @@ #include #include +// FIXME: Good idea to recycle the task groups, so this will limit +// the depth of nested coforall s and cobegins. Fix to use an ATMI +// environment variable. +#define max_num_task_groups 8 #define max_num_cpu_kernels 4096 int cpu_kernels_initialized[max_num_cpu_kernels] = {0}; atmi_kernel_t cpu_kernels[max_num_cpu_kernels]; @@ -759,6 +763,11 @@ void chpl_task_init(void) atmi_kernel_create_empty(&dummy_kernel, 0, NULL); atmi_kernel_add_cpu_impl(dummy_kernel, (atmi_generic_fp)dummy_wrapper, CPU_FUNCTION_IMPL); + // this increment needed to let the main task not be treated as an ATMI task + // because we directly launch the main task with the main thread and dont treat + // it as an ATMI task. This increment fools the rest of the runtime to think that + // the main task is an ATMI task, but it is in fact not an ATMI task. + int next_id = atomic_fetch_add_explicit_uint_least64_t(&atmi_tg_id, 1, memory_order_relaxed); atmi_task_handle_t dummy_handle = atmi_task_create(dummy_kernel); int32_t commMaxThreads; int32_t hwpar; @@ -968,7 +977,6 @@ int chpl_task_createCommTask(chpl_fn_p fn, void *chpl_taskGroupGet() { void *ret = (void *)get_atmi_task_group(); - printf("Adding to task group: %p\n", ret); return ret; } @@ -976,7 +984,9 @@ void *chpl_taskGroupInit(int lineno, int32_t filename) { atmi_task_group_t *tg = (atmi_task_group_t *)chpl_malloc(sizeof(atmi_task_group_t)); // TODO: add to a list of task groups and free all of them at the very end. int next_id = atomic_fetch_add_explicit_uint_least64_t(&atmi_tg_id, 1, memory_order_relaxed); - printf("Next task group ID: %d\n", next_id); + // loop around the task groups. + // FIXME: how will this affect the main task that is not an ATMI task? Incr by 1 after this? + next_id %= max_num_task_groups; tg->id = next_id; tg->ordered = ATMI_FALSE; return tg; @@ -1002,7 +1012,6 @@ void chpl_taskGroupFinalize(void *tg) { } else { // if I am not within a task (main task), simply sync - printf("Waiting for task group: %p\n", tg); atmi_task_group_sync((atmi_task_group_t *)tg); } //chpl_free(tg); @@ -1048,8 +1057,8 @@ void chpl_task_addToTaskList(chpl_fn_int_t fid, ATMI_LPARM_CPU(lparm, cpu_id); lparm->kernel_id = CPU_FUNCTION_IMPL; //lparm->synchronous = ATMI_TRUE; + lparm->groupable = ATMI_TRUE; if(task_group) { - lparm->groupable = ATMI_TRUE; lparm->group = (atmi_task_group_t *)task_group; } @@ -1080,7 +1089,6 @@ static inline void taskCallBody(chpl_fn_int_t fid, chpl_fn_name fname, chpl_fn_p c_sublocid_t subloc, chpl_bool serial_state, int lineno, int32_t filename) { - //printf("Adding %d fn to task call body\n", fid); chpl_task_bundle_t *bundle = (chpl_task_bundle_t*) arg; bundle->serial_state = serial_state; @@ -1252,7 +1260,8 @@ uint32_t chpl_task_getMaxPar(void) { // will decide itself how much parallelism to create across and // within sublocales, if there are any. // - return (uint32_t) 8;//qthread_num_workers(); + return qthread_num_workers(); + //return (uint32_t) g_machine->devices_by_type[ATMI_DEVTYPE_CPU][0].core_count; } c_sublocid_t chpl_task_getNumSublocales(void) @@ -1294,7 +1303,8 @@ int32_t chpl_task_getNumBlockedTasks(void) uint32_t chpl_task_getNumThreads(void) { - return (uint32_t) 8;//qthread_num_workers(); + return qthread_num_workers(); + //return (uint32_t) g_machine->devices_by_type[ATMI_DEVTYPE_CPU][0].core_count; } // Ew. Talk about excessive bookkeeping.