From dab67f4fdccf4be6328722e3e061beedc6a24dec Mon Sep 17 00:00:00 2001 From: Zhaoliang Ma Date: Fri, 8 Sep 2023 15:04:35 +0800 Subject: [PATCH] [VP9] [Encode] Fill padding with gmm CpuBlt on MTL Fill the padding for unaligned height input with gmmlib CpuBlt() for VP9 encoding on MTL. --- .../vp9/packet/encode_vp9_vdenc_packet.cpp | 121 +++++++++++++++--- 1 file changed, 102 insertions(+), 19 deletions(-) diff --git a/media_softlet/agnostic/common/codec/hal/enc/vp9/packet/encode_vp9_vdenc_packet.cpp b/media_softlet/agnostic/common/codec/hal/enc/vp9/packet/encode_vp9_vdenc_packet.cpp index 0d977c90784..23a2bfe77eb 100644 --- a/media_softlet/agnostic/common/codec/hal/enc/vp9/packet/encode_vp9_vdenc_packet.cpp +++ b/media_softlet/agnostic/common/codec/hal/enc/vp9/packet/encode_vp9_vdenc_packet.cpp @@ -796,35 +796,118 @@ void Vp9VdencPkt::fill_pad_with_value(PMOS_SURFACE psSurface, uint32_t real_heig uint32_t pitch = psSurface->dwPitch; uint32_t UVPlaneOffset = psSurface->UPlaneOffset.iSurfaceOffset; uint32_t YPlaneOffset = psSurface->dwOffset; + uint32_t pad_rows = aligned_height - real_height; + uint32_t y_plane_size = pitch * real_height; + uint32_t uv_plane_size = pitch * real_height / 2; MOS_LOCK_PARAMS lockFlags; MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); lockFlags.WriteOnly = 1; - uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags); - ENCODE_CHK_NULL_NO_STATUS_RETURN(src_data); + // padding for the linear format buffer. + if (psSurface->OsResource.TileType == MOS_TILE_LINEAR) + { + uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags); - uint32_t pad_rows = aligned_height - real_height; + if (!src_data) + return; - uint8_t *src_data_y = src_data + YPlaneOffset; - uint32_t y_plane_size = pitch * real_height; - uint8_t *src_data_y_end = src_data_y + y_plane_size; - uint32_t y_pitch = pitch; - for (uint32_t i = 0; i < pad_rows; i++) - { - MOS_SecureMemcpy(src_data_y_end + i * y_pitch, y_pitch, src_data_y_end - y_pitch, y_pitch); - } + uint8_t *src_data_y = src_data + YPlaneOffset; + uint8_t *src_data_y_end = src_data_y + y_plane_size; + for (uint32_t i = 0; i < pad_rows; i++) + { + MOS_SecureMemcpy(src_data_y_end + i * pitch, pitch, src_data_y_end - pitch, pitch); + } - uint8_t *src_data_uv = src_data + UVPlaneOffset; - uint32_t uv_plane_size = pitch * real_height / 2; - uint8_t *src_data_uv_end = src_data_uv + uv_plane_size; - uint32_t uv_pitch = pitch / 2; - for (uint32_t i = 0; i < pad_rows; i++) + uint8_t *src_data_uv = src_data + UVPlaneOffset; + uint8_t *src_data_uv_end = src_data_uv + uv_plane_size; + for (uint32_t i = 0; i < pad_rows / 2; i++) + { + MOS_SecureMemcpy(src_data_uv_end + i * pitch, pitch, src_data_uv_end - pitch, pitch); + } + + m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource)); + } + else if (psSurface->OsResource.TileType == MOS_TILE_Y) { - MOS_SecureMemcpy(src_data_uv_end + i * uv_pitch, uv_pitch, src_data_uv_end - uv_pitch, uv_pitch); + // we don't copy out the whole tiled buffer to linear and padding on the tiled buffer directly. + lockFlags.TiledAsTiled = 1; + + uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags); + if (!src_data) + return; + + uint8_t* padding_data = (uint8_t *)MOS_AllocMemory(pitch * pad_rows); + + // Copy last Y row data to linear padding data. + GMM_RES_COPY_BLT gmmResCopyBlt = {0}; + gmmResCopyBlt.Gpu.pData = src_data; + gmmResCopyBlt.Gpu.OffsetX = 0; + gmmResCopyBlt.Gpu.OffsetY = (YPlaneOffset + y_plane_size - pitch) / pitch; + gmmResCopyBlt.Sys.pData = padding_data; + gmmResCopyBlt.Sys.RowPitch = pitch; + gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows; + gmmResCopyBlt.Sys.SlicePitch = pitch; + gmmResCopyBlt.Blt.Slices = 1; + gmmResCopyBlt.Blt.Upload = false; + gmmResCopyBlt.Blt.Width = pitch; + gmmResCopyBlt.Blt.Height = 1; + psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); + // Fill the remain padding lines with last Y row data. + for (uint32_t i = 1; i < pad_rows; i++) + { + MOS_SecureMemcpy(padding_data + i * pitch, pitch, padding_data, pitch); + } + // Filling the padding for Y. + gmmResCopyBlt.Gpu.pData = src_data; + gmmResCopyBlt.Gpu.OffsetX = 0; + gmmResCopyBlt.Gpu.OffsetY = (YPlaneOffset + y_plane_size) / pitch; + gmmResCopyBlt.Sys.pData = padding_data; + gmmResCopyBlt.Sys.RowPitch = pitch; + gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows; + gmmResCopyBlt.Sys.SlicePitch = pitch; + gmmResCopyBlt.Blt.Slices = 1; + gmmResCopyBlt.Blt.Upload = true; + gmmResCopyBlt.Blt.Width = pitch; + gmmResCopyBlt.Blt.Height = pad_rows; + psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); + + // Copy last UV row data to linear padding data. + gmmResCopyBlt.Gpu.pData = src_data; + gmmResCopyBlt.Gpu.OffsetX = 0; + gmmResCopyBlt.Gpu.OffsetY = (UVPlaneOffset + uv_plane_size - pitch) / pitch; + gmmResCopyBlt.Sys.pData = padding_data; + gmmResCopyBlt.Sys.RowPitch = pitch; + gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows / 2; + gmmResCopyBlt.Sys.SlicePitch = pitch; + gmmResCopyBlt.Blt.Slices = 1; + gmmResCopyBlt.Blt.Upload = false; + gmmResCopyBlt.Blt.Width = pitch; + gmmResCopyBlt.Blt.Height = 1; + psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); + // Fill the remain padding lines with last UV row data. + for (uint32_t i = 1; i < pad_rows / 2; i++) + { + MOS_SecureMemcpy(padding_data + i * pitch, pitch, padding_data, pitch); + } + // Filling the padding for UV. + gmmResCopyBlt.Gpu.pData = src_data; + gmmResCopyBlt.Gpu.OffsetX = 0; + gmmResCopyBlt.Gpu.OffsetY = (UVPlaneOffset + uv_plane_size) / pitch; + gmmResCopyBlt.Sys.pData = padding_data; + gmmResCopyBlt.Sys.RowPitch = pitch; + gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows / 2; + gmmResCopyBlt.Sys.SlicePitch = pitch; + gmmResCopyBlt.Blt.Slices = 1; + gmmResCopyBlt.Blt.Upload = true; + gmmResCopyBlt.Blt.Width = pitch; + gmmResCopyBlt.Blt.Height = pad_rows / 2; + psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); + + MOS_FreeMemory(padding_data); + padding_data = nullptr; + m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource)); } - - m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource)); } }