diff --git a/media_softlet/linux/common/os/i915/include/mos_bufmgr_api.h b/media_softlet/linux/common/os/i915/include/mos_bufmgr_api.h index 71cc0109952..623e748bee5 100644 --- a/media_softlet/linux/common/os/i915/include/mos_bufmgr_api.h +++ b/media_softlet/linux/common/os/i915/include/mos_bufmgr_api.h @@ -204,12 +204,12 @@ struct mos_linux_bo { #define BO_ALLOC_FOR_RENDER (1<<0) -#define PAT_INDEX_INVALID ((uint32_t)-1) +#define PAT_INDEX_INVALID ((uint16_t)-1) struct mos_drm_bo_alloc_ext{ unsigned long flags = 0; uint32_t tiling_mode = TILING_NONE; int mem_type = 0; - unsigned int pat_index = PAT_INDEX_INVALID; + unsigned int pat_index = PAT_INDEX_INVALID; //Note: change to type of uint16_t bool cpu_cacheable = true; }; diff --git a/media_softlet/linux/common/os/xe/mos_bufmgr_xe.c b/media_softlet/linux/common/os/xe/mos_bufmgr_xe.c index 208db337890..41759174017 100644 --- a/media_softlet/linux/common/os/xe/mos_bufmgr_xe.c +++ b/media_softlet/linux/common/os/xe/mos_bufmgr_xe.c @@ -305,6 +305,61 @@ typedef struct mos_xe_bo_gem { * Boolean of whether this buffer is exported to external */ bool is_exported; + /** + * @cpu_caching: The CPU caching mode to select for this object. If + * mmaping the object the mode selected here will also be used. + * + * Supported values: + * + * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back + * caching. On iGPU this can't be used for scanout surfaces. Currently + * not allowed for objects placed in VRAM. + * + * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This + * is uncached. Scanout surfaces should likely use this. All objects + * that can be placed in VRAM must use this. + */ + uint16_t cpu_caching; + + /** + * @pat_index: The platform defined @pat_index to use for this mapping. + * The index basically maps to some predefined memory attributes, + * including things like caching, coherency, compression etc. The exact + * meaning of the pat_index is platform specific and defined in the + * Bspec and PRMs. When the KMD sets up the binding the index here is + * encoded into the ppGTT PTE. + * + * For coherency the @pat_index needs to be at least 1way coherent when + * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD + * will extract the coherency mode from the @pat_index and reject if + * there is a mismatch (see note below for pre-MTL platforms). + * + * Note: On pre-MTL platforms there is only a caching mode and no + * explicit coherency mode, but on such hardware there is always a + * shared-LLC (or is dgpu) so all GT memory accesses are coherent with + * CPU caches even with the caching mode set as uncached. It's only the + * display engine that is incoherent (on dgpu it must be in VRAM which + * is always mapped as WC on the CPU). However to keep the uapi somewhat + * consistent with newer platforms the KMD groups the different cache + * levels into the following coherency buckets on all pre-MTL platforms: + * + * ppGTT UC -> COH_NONE + * ppGTT WC -> COH_NONE + * ppGTT WT -> COH_NONE + * ppGTT WB -> COH_AT_LEAST_1WAY + * + * In practice UC/WC/WT should only ever used for scanout surfaces on + * such platforms (or perhaps in general for dma-buf if shared with + * another device) since it is only the display engine that is actually + * incoherent. Everything else should typically use WB given that we + * have a shared-LLC. On MTL+ this completely changes and the HW + * defines the coherency mode as part of the @pat_index, where + * incoherent GT access is possible. + * + * Note: For userptr and externally imported dma-buf the kernel expects + * either 1WAY or 2WAY for the @pat_index. + */ + uint16_t pat_index; /** * For cmd bo, it has an exec bo list which saves all exec bo in it. @@ -1113,7 +1168,7 @@ __mos_bo_set_offset_xe(MOS_LINUX_BO *bo) } static int __mos_vm_bind_xe(int fd, uint32_t vm_id, uint32_t exec_queue_id, uint32_t bo_handle, - uint64_t offset, uint64_t addr, uint64_t size, uint32_t op, uint32_t flags, + uint64_t offset, uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op, uint32_t flags, struct drm_xe_sync *sync, uint32_t num_syncs, uint32_t region, uint64_t ext) { @@ -1126,6 +1181,7 @@ static int __mos_vm_bind_xe(int fd, uint32_t vm_id, uint32_t exec_queue_id, uint bind.exec_queue_id = exec_queue_id; bind.num_binds = 1; bind.bind.obj = bo_handle; + bind.bind.pat_index = pat_index; bind.bind.obj_offset = offset; bind.bind.range = size; bind.bind.addr = addr; @@ -1146,7 +1202,7 @@ static int __mos_vm_bind_xe(int fd, uint32_t vm_id, uint32_t exec_queue_id, uint } static int mos_xe_vm_bind_sync(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset, - uint64_t addr, uint64_t size, uint32_t op, bool is_defer) + uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op, bool is_defer) { if (is_defer) { @@ -1180,10 +1236,10 @@ static int mos_xe_vm_bind_sync(int fd, uint32_t vm_id, uint32_t bo, uint64_t off } static int mos_xe_vm_bind_async(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset, - uint64_t addr, uint64_t size, uint32_t op, + uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op, struct drm_xe_sync *sync, uint32_t num_syncs) { - return __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, + return __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index, op, XE_VM_BIND_FLAG_ASYNC, sync, num_syncs, 0, 0); } @@ -1239,6 +1295,11 @@ mos_bo_alloc_xe(struct mos_bufmgr *bufmgr, { create.flags |= XE_GEM_CREATE_FLAG_DEFER_BACKING; } + + /** + * Note: current, it only supports WB/ WC while UC and other cache are not allowed. + */ + create.cpu_caching = alloc->ext.cpu_cacheable ? DRM_XE_GEM_CPU_CACHING_WB : DRM_XE_GEM_CPU_CACHING_WC; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_GEM_CREATE, &create); @@ -1251,6 +1312,11 @@ mos_bo_alloc_xe(struct mos_bufmgr *bufmgr, bo_gem->bo.vm_id = INVALID_VM; bo_gem->bo.bufmgr = bufmgr; bo_gem->bo.align = bo_align; + bo_gem->cpu_caching = create.cpu_caching; + /** + * Note: Better to get a default pat_index to overwite invalid argv. Normally it should not happen. + */ + bo_gem->pat_index = alloc->ext.pat_index == PAT_INDEX_INVALID ? 0 : alloc->ext.pat_index; if (bufmgr_gem->mem_profiler_fd != -1) { @@ -1274,7 +1340,15 @@ mos_bo_alloc_xe(struct mos_bufmgr *bufmgr, __mos_bo_set_offset_xe(&bo_gem->bo); - ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, bufmgr_gem->vm_id, bo_gem->gem_handle, 0, bo_gem->bo.offset64, bo_gem->bo.size, XE_VM_BIND_OP_MAP, bufmgr_gem->is_defer_creation_and_binding); + ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, + bufmgr_gem->vm_id, + bo_gem->gem_handle, + 0, //addr, used for user_ptr + bo_gem->bo.offset64, + bo_gem->bo.size, + bo_gem->pat_index, + XE_VM_BIND_OP_MAP, + bufmgr_gem->is_defer_creation_and_binding); if (ret) { MOS_DRM_ASSERTMESSAGE("mos_xe_vm_bind_sync ret: %d", ret); @@ -1414,6 +1488,7 @@ mos_bo_alloc_userptr_xe(struct mos_bufmgr *bufmgr, bo_gem->gem_handle = INVALID_HANDLE; bo_gem->bo.handle = INVALID_HANDLE; bo_gem->bo.size = alloc_uptr->size; + bo_gem->pat_index = 0; //Currently, there is no cpu_caching and pat_index for user_ptr bo, hard code for it temporarily. bo_gem->bo.bufmgr = bufmgr; bo_gem->bo.vm_id = INVALID_VM; bo_gem->mem_region = MEMZONE_SYS; @@ -1434,7 +1509,15 @@ mos_bo_alloc_userptr_xe(struct mos_bufmgr *bufmgr, __mos_bo_set_offset_xe(&bo_gem->bo); - ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, bufmgr_gem->vm_id, 0, (uint64_t)alloc_uptr->addr, bo_gem->bo.offset64, bo_gem->bo.size, XE_VM_BIND_OP_MAP_USERPTR, bufmgr_gem->is_defer_creation_and_binding); + ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, + bufmgr_gem->vm_id, + 0, //handle for bo, not used by user_ptr + (uint64_t)alloc_uptr->addr, + bo_gem->bo.offset64, + bo_gem->bo.size, + bo_gem->pat_index, + XE_VM_BIND_OP_MAP_USERPTR, + bufmgr_gem->is_defer_creation_and_binding); if (ret) { MOS_DRM_ASSERTMESSAGE("mos_xe_vm_bind_userptr_sync ret: %d", ret); @@ -1513,6 +1596,11 @@ mos_bo_create_from_prime_xe(struct mos_bufmgr *bufmgr, int prime_fd, int size) bo_gem->bo.size = size; bo_gem->bo.handle = handle; + /* + * Note, currectly there is no cpu_caching and pat_index for external-imported bo, hard code for it. + * Need to get the pat_index by the customer_gmminfo with 1way coherency at least later. + */ + bo_gem->pat_index = 0; //Note need to hard code a pat_index with 1way coherency at least bo_gem->bo.bufmgr = bufmgr; bo_gem->gem_handle = handle; @@ -1526,7 +1614,14 @@ mos_bo_create_from_prime_xe(struct mos_bufmgr *bufmgr, int prime_fd, int size) __mos_bo_set_offset_xe(&bo_gem->bo); - ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, bufmgr_gem->vm_id, bo_gem->gem_handle, 0, bo_gem->bo.offset64, bo_gem->bo.size, XE_VM_BIND_OP_MAP, bufmgr_gem->is_defer_creation_and_binding); + ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, + bufmgr_gem->vm_id, + bo_gem->gem_handle, + 0, + bo_gem->bo.offset64, + bo_gem->bo.size, + XE_VM_BIND_OP_MAP, + bufmgr_gem->is_defer_creation_and_binding); if (ret) { MOS_DRM_ASSERTMESSAGE("mos_xe_vm_bind_sync ret: %d", ret); @@ -3091,7 +3186,15 @@ mos_bo_free_xe(struct mos_linux_bo *bo) if(bo->vm_id != INVALID_VM) { - ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, bo->vm_id, 0, 0, bo->offset64, bo->size, XE_VM_BIND_OP_UNMAP, bufmgr_gem->is_defer_creation_and_binding); + ret = mos_xe_vm_bind_sync(bufmgr_gem->fd, + bo->vm_id, + 0, + 0, + bo->offset64, + bo->size, + bo_gem->pat_index, + XE_VM_BIND_OP_UNMAP, + bufmgr_gem->is_defer_creation_and_binding); if (ret) { MOS_DRM_ASSERTMESSAGE("mos_gem_bo_free mos_vm_unbind ret error. bo:0x%lx, vm_id:%d\r",