Skip to content

Commit

Permalink
Extended Chapel tasking interface to use more of ATMI
Browse files Browse the repository at this point in the history
  • Loading branch information
ashwinma committed Mar 4, 2017
1 parent e984ec5 commit e8d5062
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 96 deletions.
4 changes: 2 additions & 2 deletions make/compiler/Makefile.hsa
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ CLOC=/opt/rocm/cloc/bin/cloc.sh
LIBS+=-latmi_runtime -lm

# TODO: move these in third-party directory?
GEN_LFLAGS+=-L/opt/rocm/lib -L/opt/rocm/hsa/lib -L/opt/rocm/libatmi/lib
HSA_INCLUDES=-I/opt/rocm/libatmi/include
GEN_LFLAGS+=-L/opt/rocm/lib -L/opt/rocm/hsa/lib -L/opt/rocm/atmi/lib
HSA_INCLUDES=-I/opt/rocm/atmi/include
else
# HSA locations
CLOC=$(THIRD_PARTY_DIR)/hsa/cloc/bin/cloc.sh
Expand Down
5 changes: 4 additions & 1 deletion runtime/include/chpl-atmi.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@

atmi_kernel_t reduction_kernel;
atmi_kernel_t *gpu_kernels;
atmi_kernel_t main_kernel;
int g_num_cpu_kernels;

atmi_machine_t *g_machine;

enum {
GPU_KERNEL_IMPL = 10565,
REDUCTION_GPU_IMPL = 42
REDUCTION_GPU_IMPL = 42,
CPU_FUNCTION_IMPL = 43
};

int chpl_hsa_initialize(void);
Expand Down
5 changes: 4 additions & 1 deletion runtime/include/gpu_base_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ typedef uint_least8_t atomic_uint_least8_t;
typedef uint_least16_t atomic_uint_least16_t;
typedef uint_least32_t atomic_uint_least32_t;
typedef uint_least64_t atomic_uint_least64_t;
typedef chpl_bool atomic_flag_n;
//typedef uintptr_t atomic_uintptr_t;
typedef chpl_bool atomic_bool;
typedef uint64_t atomic__real64;
typedef uint32_t atomic__real32;


# define INT8_C(c) c
Expand Down
67 changes: 24 additions & 43 deletions runtime/include/tasks/atmi/tasks-atmi.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include "chpl-atmi.h"
#include "chpl-tasks-prvdata.h"
#include "chpltypes.h"

#include "chpl-mem.h"
#include "qthread.h"
#include "qthread-chapel.h"

Expand All @@ -50,8 +50,8 @@ void chpl_task_yield(void);
// Type (and default value) used to communicate task identifiers
// between C code and Chapel code in the runtime.
//
typedef unsigned int chpl_taskID_t;
#define chpl_nullTaskID QTHREAD_NULL_TASK_ID
typedef uint64_t chpl_taskID_t;
#define chpl_nullTaskID ATMI_NULL_TASK_HANDLE

//
// Sync variables
Expand Down Expand Up @@ -93,7 +93,7 @@ typedef struct {
} chpl_task_bundle_t;

// Structure of task-local storage
typedef struct chpl_qthread_tls_s {
typedef struct chpl_atmi_tls_s {
chpl_task_bundle_t *bundle;
// The below fields could move to chpl_task_bundleData_t
// That would reduce the size of the task local storage,
Expand All @@ -102,39 +102,23 @@ typedef struct chpl_qthread_tls_s {
/* Reports */
int lock_filename;
int lock_lineno;
} chpl_qthread_tls_t;
} chpl_atmi_tls_t;

extern pthread_key_t tls_cache;

extern pthread_t chpl_qthread_process_pthread;
extern pthread_t chpl_qthread_comm_pthread;

extern chpl_qthread_tls_t chpl_qthread_process_tls;
extern chpl_qthread_tls_t chpl_qthread_comm_task_tls;
extern chpl_atmi_tls_t chpl_qthread_process_tls;
extern chpl_atmi_tls_t chpl_qthread_comm_task_tls;

#define CHPL_TASK_STD_MODULES_INITIALIZED chpl_task_stdModulesInitialized
void chpl_task_stdModulesInitialized(void);

// Wrap qthread_get_tasklocal() and assert that it is always available.
static inline chpl_qthread_tls_t* chpl_qthread_get_tasklocal(void)
{
chpl_qthread_tls_t* tls;

if (chpl_qthread_done_initializing) {
tls = (chpl_qthread_tls_t*)
qthread_get_tasklocal(sizeof(chpl_qthread_tls_t));
if (tls == NULL) {
pthread_t me = pthread_self();
if (pthread_equal(me, chpl_qthread_comm_pthread))
tls = &chpl_qthread_comm_task_tls;
else if (pthread_equal(me, chpl_qthread_process_pthread))
tls = &chpl_qthread_process_tls;
}
assert(tls);
}
else
tls = NULL;
extern pthread_t null_thread;

return tls;
}
// Wrap qthread_get_tasklocal() and assert that it is always available.
extern chpl_atmi_tls_t* chpl_atmi_get_tasklocal(void);

#ifdef CHPL_TASK_GET_PRVDATA_IMPL_DECL
#error "CHPL_TASK_GET_PRVDATA_IMPL_DECL is already defined!"
Expand All @@ -143,7 +127,7 @@ static inline chpl_qthread_tls_t* chpl_qthread_get_tasklocal(void)
#endif
static inline chpl_task_prvData_t* chpl_task_getPrvData(void)
{
chpl_qthread_tls_t * data = chpl_qthread_get_tasklocal();
chpl_atmi_tls_t * data = chpl_atmi_get_tasklocal();
if (data) {
return &data->prvdata;
}
Expand All @@ -162,7 +146,11 @@ static inline chpl_task_prvData_t* chpl_task_getPrvData(void)
static inline
c_sublocid_t chpl_task_getSubloc(void)
{
return (c_sublocid_t) qthread_shep();
chpl_atmi_tls_t * data = chpl_atmi_get_tasklocal();
if (data)
return data->bundle->requestedSubloc;
else
return c_sublocid_any;
}

#ifdef CHPL_TASK_SETSUBLOC_IMPL_DECL
Expand All @@ -173,8 +161,6 @@ c_sublocid_t chpl_task_getSubloc(void)
static inline
void chpl_task_setSubloc(c_sublocid_t subloc)
{
qthread_shepherd_id_t curr_shep;

assert(subloc != c_sublocid_none);

// Only change sublocales if the caller asked for a particular one,
Expand All @@ -188,16 +174,10 @@ void chpl_task_setSubloc(c_sublocid_t subloc)
// before tasking init and in any case would be done from the
// main thread of execution, which doesn't have a shepherd.
// The code below wouldn't work in that situation.
if ((curr_shep = qthread_shep()) != NO_SHEPHERD) {
chpl_qthread_tls_t * data = chpl_qthread_get_tasklocal();
if (data) {
data->bundle->requestedSubloc = subloc;
}

if (subloc != c_sublocid_any &&
(qthread_shepherd_id_t) subloc != curr_shep) {
qthread_migrate_to((qthread_shepherd_id_t) subloc);
}
chpl_atmi_tls_t * data = chpl_atmi_get_tasklocal();
if (data) {
data->bundle->requestedSubloc = subloc;
printf("Setting ATMI requested subloc to %d\n", subloc);
}
}

Expand All @@ -209,10 +189,11 @@ void chpl_task_setSubloc(c_sublocid_t subloc)
static inline
c_sublocid_t chpl_task_getRequestedSubloc(void)
{
chpl_qthread_tls_t * data = chpl_qthread_get_tasklocal();
chpl_atmi_tls_t * data = chpl_atmi_get_tasklocal();
if (data) {
return data->bundle->requestedSubloc;
}

return c_sublocid_any;
}

Expand Down
Loading

0 comments on commit e8d5062

Please sign in to comment.