Skip to content

Commit

Permalink
Align HCL source to 1.18.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ytava committed Oct 13, 2024
1 parent d108b1c commit 7e01f03
Show file tree
Hide file tree
Showing 386 changed files with 15,469 additions and 9,436 deletions.
16 changes: 16 additions & 0 deletions dependencies/habanalabs/include/uapi/drm/habanalabs_accel.h
Original file line number Diff line number Diff line change
Expand Up @@ -2797,6 +2797,8 @@ struct hl_debug_params_read_block {
#define HL_DEBUG_OP_SET_MODE 7
/* Opcode for fetching trace data */
#define HL_DEBUG_OP_FETCH_TRACE 8
/* Opcode for direct I/O operations */
#define HL_DEBUG_OP_DIO 9

/* Opcode for debug read memory */
#define HL_DEBUG_OP_READMEM 1024
Expand Down Expand Up @@ -3658,6 +3660,20 @@ struct hl_nic_args {
#define HL_IOCTL_DEBUG 0x05
#define HL_IOCTL_NIC 0x06

#define HL_DIO_CMD_SSD2HL 1
#define HL_DIO_CMD_HL2SSD 2

struct hl_dio_args {
struct {
__u64 device_va;
__u64 off_bytes;
__u64 len_bytes;
__u32 fd;
} ssd2hl;

__u32 op;
};

/*
* Various information operations such as:
* - H/W IP information
Expand Down
8 changes: 8 additions & 0 deletions dependencies/hl-thunk/include/uapi/hlthunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -2167,6 +2167,14 @@ hlthunk_public int hlthunk_nic_user_encap_unset(
hlthunk_public int hlthunk_nic_dump_qp(int fd, uint32_t port, uint32_t qpn, uint32_t req,
char *buf, uint32_t buf_size);

/**
* This function retrieves the NIC ports enabled ports masks. This function is common for all ASICs.
* @param fd file descriptor handle of habanalabs main device.
* @param mask returned masks.
* @return 0 if success. Non-zero for any error.
*/
hlthunk_public int hlthunk_nic_get_enabled_ports_mask(int fd, uint64_t *mask);

/**
* This function retrieves the NIC ports and external ports masks. This function shall be used
* only for Gaudi2 and later ASICs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,16 @@ enum scheduler_type_t {
SCHED_TYPE_SIZE = 0xF
};

/**
* Max number of MMEs
*/
#define GAUDI2_MAX_MME_COUNT 2

/**
* Max number of MMEs
*/
#define GAUDI2_MAX_EDMA_COUNT 5

/**
* Total number of engine groups supported by firmware
*/
Expand Down Expand Up @@ -218,8 +228,9 @@ enum sched_cmpt_sync_scheme_bitmap {
*/
enum {
SYNC_SCHEME_FENCE_ID = 0,
EXT_SIGNAL_FENCE_ID = SYNC_SCHEME_FENCE_ID,
B2B_FENCE_ID = 1,
EXT_SIGNAL_FENCE_ID = 2
GC_USED_FENCE_ID = 2
};

/**<
Expand Down
187 changes: 177 additions & 10 deletions dependencies/qman_fw/engines-arc/include/gaudi2_arc_eng_packets.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ enum eng_arc_cmd_t {
ECB_CMD_NOP = 1,
ECB_CMD_WD_FENCE_AND_EXE = 2,
ECB_CMD_SCHED_DMA = 3,
ECB_CMD_STATIC_DESC_V2 = 4,
ECB_CMD_SFG = 5,
ECB_CMD_RESET_SOSET = 6,
ECB_CMD_COUNT = 7
ECB_CMD_SCHED_DMA_V2 = 4,
ECB_CMD_STATIC_DESC_V2 = 5,
ECB_CMD_SFG = 6,
ECB_CMD_RESET_SOSET = 7,
ECB_CMD_COUNT = 8
};

/**
Expand Down Expand Up @@ -211,6 +212,10 @@ enum nic_scaleout_eng_arc_cmd_t {
*/
#define WD_CTXT_COUNT 8

#define EXPERT_MAPPING_CTXT_COUNT 2
#define EXPERT_MAPPING_ENTRY_COUNT 32
#define INVALID_EXPERT_MAPPING_ENTRY 0XFFFF

#define MAX_DIMENSIONS 5

#define TENSOR_DIM0 0
Expand Down Expand Up @@ -460,6 +465,23 @@ struct virt_sob_ids_t {
*/
} __attribute__ ((aligned(4), __packed__));


/**
* \struct full_hbm_addr_ctxt_t
* \brief full hbm addr ctxt
* \details full hbm addr used for patching
*/
struct full_hbm_addr_ctxt_t {
union {
uint64_t hbm_addr;
struct {
uint64_t addr_low:32;
uint64_t addr_high:32;
} __attribute__ ((aligned(4), __packed__));
};
} __attribute__ ((aligned(4), __packed__));


/**
* \struct rot_wd_ctxt_t
* \brief Rotator specific work distribution context
Expand Down Expand Up @@ -507,6 +529,8 @@ struct rot_wd_ctxt_t {
*/
struct rot_wd_ctxts_t {
struct rot_wd_ctxt_t rot_ctxt[WD_CTXT_COUNT];
struct full_hbm_addr_ctxt_t weight_base_address_ctxt[WD_CTXT_COUNT];
uint16_t expert_mapping_ctxt[EXPERT_MAPPING_CTXT_COUNT * EXPERT_MAPPING_ENTRY_COUNT];
/**<
* array of contexts for Rotator
*/
Expand All @@ -517,6 +541,13 @@ struct rot_wd_ctxts_t {
*/
} __attribute__ ((aligned(4), __packed__));

enum mme_operand_type_t {
MME_ADDR_A = 0,
MME_ADDR_B = 1,
MME_ADDR_COUT0 = 2,
MME_OPERAND_COUNT = 3
};

/**
* \struct mme_wd_ctxt_t
* \brief MME specific work distribution context
Expand All @@ -535,7 +566,11 @@ struct mme_wd_ctxt_t {
* value of the switch bit to be configured when pushing the
* descriptor into ARC CQ
*/
uint32_t reserved:7;
uint32_t mme_operand:2;
/**<
* mme operand to patch from mme_operand_type_t
*/
uint32_t reserved:5;
/**<
* reserved
*/
Expand All @@ -554,6 +589,10 @@ struct mme_wd_ctxt_t {
/**<
* Virtual SOB array
*/
struct full_hbm_addr_ctxt_t weight_offset[GAUDI2_MAX_MME_COUNT];
/**<
* hbm addr offset of tensor for patching
*/
} __attribute__ ((aligned(4), __packed__));

/**
Expand All @@ -563,6 +602,8 @@ struct mme_wd_ctxt_t {
*/
struct mme_wd_ctxts_t {
struct mme_wd_ctxt_t mme_ctxt[WD_CTXT_COUNT];
struct full_hbm_addr_ctxt_t weight_base_address_ctxt[WD_CTXT_COUNT];
uint16_t expert_mapping_ctxt[EXPERT_MAPPING_CTXT_COUNT * EXPERT_MAPPING_ENTRY_COUNT];
/**<
* array of contexts for MME
*/
Expand Down Expand Up @@ -590,6 +631,12 @@ enum edma_op_type_t {
EDMA_OP_COUNT = 6
};

enum edma_operand_type_t {
EDMA_SRC = 0,
EDMA_DST = 1,
EDMA_OPERAND_COUNT = 2
};

/**<
* Total number of EDMA engines involved in compute
*/
Expand Down Expand Up @@ -665,11 +712,15 @@ struct edma_wd_ctxt_t {
* alternate address of RD_HBW_MAX_OUTSTAND as completion address
* value of 0 is set by the GC in the WR_COMP_WDATA
*/
uint32_t dma_operand:1;
/**<
* Edma operand to patch from edma_operand_type_t
*/
uint32_t sig_inc_value:16;
/**<
* Increment value to be added to previous threshold
*/
uint32_t virtual_sob_bitmap:8;
uint32_t virtual_sob_bitmap:7;
/**<
* Virtual SOB bitmap indicating index which are valid
* in the virtual_sob array
Expand All @@ -688,6 +739,10 @@ struct edma_wd_ctxt_t {
/**<
* Virtual SOB array
*/
struct full_hbm_addr_ctxt_t weight_offset[GAUDI2_MAX_EDMA_COUNT];
/**<
* hbm addr offset of tensor for patching
*/
} __attribute__ ((aligned(4), __packed__));

/**
Expand All @@ -697,6 +752,8 @@ struct edma_wd_ctxt_t {
*/
struct edma_wd_ctxts_t {
struct edma_wd_ctxt_t edma_ctxt[WD_CTXT_COUNT];
struct full_hbm_addr_ctxt_t weight_base_address_ctxt[WD_CTXT_COUNT];
uint16_t expert_mapping_ctxt[EXPERT_MAPPING_CTXT_COUNT * EXPERT_MAPPING_ENTRY_COUNT];
/**<
* array of contexts for EDMA
*/
Expand Down Expand Up @@ -867,7 +924,11 @@ struct tpc_wd_ctxt_t {
union {
uint32_t word2;
struct {
uint16_t reserved1;
uint16_t tensor_id: 4;
/**<
* tpc operand to patch (0-15)
*/
uint16_t reserved1: 12;
/**<
* reserved
*/
Expand All @@ -881,6 +942,10 @@ struct tpc_wd_ctxt_t {
/**<
* Virtual SOB array
*/
struct full_hbm_addr_ctxt_t weight_offset;
/**<
* hbm addr offset of tensor for patching
*/
} __attribute__ ((aligned(4), __packed__));

/**
Expand All @@ -890,6 +955,8 @@ struct tpc_wd_ctxt_t {
*/
struct tpc_wd_ctxts_t {
struct tpc_wd_ctxt_t tpc_ctxt[WD_CTXT_COUNT];
struct full_hbm_addr_ctxt_t weight_base_address_ctxt[WD_CTXT_COUNT];
uint16_t expert_mapping_ctxt[EXPERT_MAPPING_CTXT_COUNT * EXPERT_MAPPING_ENTRY_COUNT];
/**<
* Array of contexts for TPC
*/
Expand Down Expand Up @@ -1000,6 +1067,17 @@ struct eng_arc_cmd_static_desc_v2_t {
*/
} __attribute__ ((aligned(4), __packed__));

/**
* \enum signaling_completion_type_t
* \brief completion signal sent to sob by firmware
* \details completion signal sent to sob by firmware
*/
enum signaling_completion_type_t {
SIGNAL_TO_SYNC_SCHEME_SOB = 0x0,
SINGAL_TO_AUX_REG = 0x1,
SINGAL_COUNT = 0x2
};

/**
* \struct eng_arc_cmd_wd_fence_and_exec_t
* \brief Work distribution, fence and execute
Expand All @@ -1019,18 +1097,40 @@ struct eng_arc_cmd_wd_fence_and_exec_t {
* Number of DMAs should complete before the execution can start.
* Expected value is 1.
*/
uint32_t reserved:19;
uint32_t dma2_completion:3;
/**<
* reserved
* Number of DMAs should complete before the execution can start.
* This wait is for dma waiting for dma. Can have 0 or more value.
*/
uint32_t wd_ctxt_id:3;
/**<
* a context number from 0 to max number of contexts that fw supports
*/
uint32_t reserved2:2;
uint32_t wd_ctxt2_id:3;
/**<
* a context number from 0 to max number of weight_base_address contexts
*/
uint32_t patch_address:1;
/**<
* Patch address before execution
*/
uint32_t signal_arc:1;
/**<
* which sob to signal from signaling_completion_type_t
*/
uint32_t expert_mapping_idx: 6;
/**<
* expert mapping index
*/
uint32_t conditional_activation:1;
/**<
* conditional_activation
*/
uint32_t :6;
/**<
* reserved
*/

} __attribute__ ((aligned(4), __packed__));

/**
Expand Down Expand Up @@ -1069,6 +1169,73 @@ struct eng_arc_cmd_sched_dma_t {
*/
} __attribute__ ((aligned(4), __packed__));


/**
* DMA type
*/
enum dma_type_t {
DMA_EXPERT_MAPPING_TABLE = 0x0,
DMA_HBM_TENSOR_ADDR = 0x1,
DMA_COUNT = 0x2
};

/**
* \struct eng_arc_cmd_sched_dma_v2_t
* \brief Schedule DMA version 2 to update GC context
* \details Initiate a DMA transfer to update expert mapping context.
*/
struct eng_arc_cmd_sched_dma_v2_t {
uint32_t cmd_type:4;
/**<
* set to ECB_CMD_SCHED_DMA_V2
*/
uint32_t yield:1;
/**<
* Yield ARC control to the other list (s/d) after execution
*/
uint32_t dma_completion:3;
/**<
* Number of DMAs should complete before starting this DMA
*/
uint32_t addr_index:3;
/**<
* Recipe base address register index to be used to generate
* target address of 64 bits
*/
uint32_t size:8;
/**<
* size of the buffer in bytes
*/
uint32_t dma_type:1;
/*
* What needs to be dma from dma_type_t
* 0 - DMA_EXPERT_MAPPING_TABLE
* 1 - DMA_HBM_TENSOR_ADDR
*/
uint32_t wait_for_eng:1;
/*
* Wait for a signal from Engine
*/
uint32_t expert_mapping_idx: 6;
/**<
* expert mapping index
*/
uint32_t :2;
/*
* Reserved
*/
uint32_t wd_ctxt_id:3;
/*
* GC Context ID that needs to be updated
* This is used to calculate Destination Address
*/
uint32_t addr_offset;
/**<
* 32bit address offset into recipe base address
*/
} __attribute__ ((aligned(4), __packed__));


/**
* \struct eng_arc_cmd_sfg_t
* \brief Signal From Graph
Expand Down
Loading

0 comments on commit 7e01f03

Please sign in to comment.