Skip to content

Commit

Permalink
clean and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Dimitri Gerin committed Jul 29, 2022
1 parent 6eec300 commit b128620
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 120 deletions.
80 changes: 35 additions & 45 deletions upmem/include/embedding.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,74 +13,64 @@
#include <time.h>
#include <unistd.h>

#define TIME_NOW(_t) (clock_gettime(CLOCK_MONOTONIC, (_t)))

/**
* @brief TBC
* @param TBC
*/
typedef struct dpu_runtime_totals {
double execution_time_prepare;
double execution_time_populate_copy_in;
double execution_time_copy_in;
double execution_time_copy_out;
double execution_time_aggregate_result;
double execution_time_launch;
} dpu_runtime_totals;

/**
* @brief TBC
* @param TBC
*/
typedef struct dpu_timespec {
long tv_nsec;
long tv_sec;
} dpu_timespec;

/**
* @brief TBC
* @param TBC
*/
typedef struct dpu_runtime_interval {
dpu_timespec start;
dpu_timespec stop;
} dpu_runtime_interval;

/**
* @brief TBC
* @param TBC
/** @brief information about embedding input
* @param indices_len store dpu indices len (elem vector x batch size) for each DPU
* @param nr_batches input batch size
* @param nr_indexex nomber of indices in each input element
*/
typedef struct dpu_runtime_group {
unsigned int in_use;
unsigned int length;
dpu_runtime_interval *intervals;
} dpu_runtime_group;

typedef struct input_info {
uint64_t *indices_len;
uint64_t nr_batches;
uint64_t nr_indexes;
} input_info;

/** @brief agregates input batch buffer structure for pipelined system
* @param valid validity of current batch
* @param indices array that stores indices [EMB_INDEX][BATCH_INDEX * INDEXES]
* @param offsets array that stores indices offset (pytorch EmbedingBag convention)
* @param input_info input info structure
*/
typedef struct input_batch {
bool valid;
uint32_t **indices;
uint32_t **offsets;
input_info *input_info;
} input_batch;

/** @brief single DPU embedding mapping information structure
* @param nr_cols number of collumn in the DPU
* @param start_col index of first column in the DPU
* @param embedding_index embedding index mapped in the DPU
*/
typedef struct embedding_dpu_mapping {
uint64_t nr_cols;
uint32_t start_col;
uint32_t embedding_index;
} embedding_dpu_mapping;

/** @brief information about embedding configuration
* @param nr_embedding number of embedding
* @param nr_rows number of rows in the DPU
* @param nr_cols number of collumn in the DPU
* @param start_col index of first column in the DPU
* @param sizeT embedding data size (byte)
*/
typedef struct embedding_info {
uint32_t nr_embedding;
uint32_t nr_rows;
uint32_t nr_cols;
uint32_t sizeT;
} embedding_info;

/** @brief global ranks embedding mapping information structure
* @param nr_dpus total number of DPUs
* @param nr_ranks total number of ranks
* @param nr_cols_per_dpu full DPU number of column
* @param dpu_part_col non full DPU number of column
* @param rank_nr_dpus number of ranks in each DPU
* @param rank_start_dpus absolute index of first DPU in each rank
* @param rank_dpus_mapping dpu mapping matrix for each DPU of each rank
*/
typedef struct embeding_rank_mapping {
uint32_t nr_dpus;
uint32_t nr_ranks;
Expand Down Expand Up @@ -110,13 +100,13 @@ populate_mram(embedding_rank_mapping *rank_mapping, embedding_info *emb_info, in
dpu_error_t
post_process(struct dpu_set_t dpu_rank, uint64_t rank_id, void *arg);

int32_t *
void
lookup(uint32_t **indices, uint32_t **offsets, struct input_info *input_info,
embedding_rank_mapping *rank_mapping_info, uint64_t nr_embedding, uint64_t nr_cols,
uint64_t nr_rows, float **result_buffer, int32_t **dpu_result_buffer);

void
free_embedding_dpu_backend();
free_dpu_backend();

void
alloc_embedding_dpu_backend();
alloc_dpu_backend();
58 changes: 33 additions & 25 deletions upmem/src/embedding.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
#include <stdlib.h>
#include <time.h>

/** @brief DPU binary path */
#define DPU_BINARY "./build/embdpu"

/** @brief global referene to dpu_set */
struct dpu_set_t dpu_set;

/** @brief alloc dpu set with given number of dpus */
void
alloc_dpus(uint32_t nr_dpus) {
}

/**
* @brief free embedding rank mapping structure
* @param rank_mapping embedding rank mapping structure
*/
void
free_embedding_rank_mapping(embedding_rank_mapping *rank_mapping) {
for (uint32_t rank_index = 0; rank_index < rank_mapping->nr_ranks; rank_index++) {
Expand All @@ -27,6 +28,12 @@ free_embedding_rank_mapping(embedding_rank_mapping *rank_mapping) {
free(rank_mapping);
}

/**
* @brief build embedding rank mapping structure
* @param emb_info embedding info structure
* @param i_info input info structure
* @return embedding rank mapping structure
*/
embedding_rank_mapping *
embedding_dpu_map(embedding_info *emb_info, input_info *i_info) {

Expand Down Expand Up @@ -188,8 +195,10 @@ embedding_dpu_map(embedding_info *emb_info, input_info *i_info) {
return rank_mapping;
}

/** @brief transfer one embedding table params to DPU DRAM
* @param TODO
/** @brief transfer embedding tables to DPUs MRAM
* @param rank_mapping embedding rank mapping structure
* @param emb_info embedding info structure
* @param embedding_tables embedding tables buffer
*/
void
populate_mram(embedding_rank_mapping *rank_mapping, embedding_info *emb_info,
Expand Down Expand Up @@ -280,10 +289,10 @@ struct callback_input {

struct callback_input *callback_data = NULL;

/** @brief host side post processing of DPU side embedding results
* @param dpu_rank pointer to rank dpu set
* @param rank_id index of the rank
* @param args rank callback generic args
/** @brief rank callback for DPU results post processing
* @param rank dpu_set rank pointer
* @param rank_index index of the rank
* @param cb_args thread function args
*/
dpu_error_t
gather_rank_embedding_results(struct dpu_set_t rank, uint32_t rank_index, void *cb_arg) {
Expand Down Expand Up @@ -348,19 +357,18 @@ gather_rank_embedding_results(struct dpu_set_t rank, uint32_t rank_index, void *
return DPU_OK;
}

/** @brief perform DPU lookup operation in embedding set and for input indices of
* multiple batch
/** @brief perform DPU lookup operation
* @param indices array that stores indices [EMB_INDEX][BATCH_INDEX * INDEXES]
* @param offsets array that stores indices offset (pytorch EmbedingBag convention)
* [EMB_INDEX][BATCH_INDEX][OFFSET]
* @param indices_len gives the lenght of the input indices vector for each embedding
* [EMB_INDEX]
* @param nr_batches_per_embedding gives the number of batch (same for each embedding) in
* indices
* @param result_buffer embedding lookup operation DPU results
* @return TBC
* @param input_info input info structure
* @param rank_mapping embedding rank mapping structure
* @param nr_embedding number of embedding
* @param nr_cols number of embedding column
* @param n_rows number of embedding rows
* @param result_buffer DPU formated result buffer
* @param dpu_result_buffer dpu_result_buffer
*/
int32_t *
void
lookup(uint32_t **indices, uint32_t **offsets, input_info *input_info,
embedding_rank_mapping *rank_mapping, uint64_t nr_embedding, uint64_t nr_cols,
uint64_t nr_rows, float **result_buffer, int32_t **dpu_result_buffer) {
Expand Down Expand Up @@ -460,17 +468,17 @@ lookup(uint32_t **indices, uint32_t **offsets, input_info *input_info,
DPU_ASSERT(
dpu_callback(dpu_set, gather_rank_embedding_results, callback_data, DPU_CALLBACK_DEFAULT));
free(lengths);

return 0;
}

/** @brief allocate DPU backend */
void
alloc_embedding_dpu_backend() {
alloc_dpu_backend() {
assert(callback_data == NULL);
callback_data = malloc(sizeof(struct callback_input));
}

/** @brief free DPU backend */
void
free_embedding_dpu_backend() {
free_dpu_backend() {
free(callback_data);
}
Loading

0 comments on commit b128620

Please sign in to comment.