From c22f4ae50002ef0a93bfe1895684f36abd92517d Mon Sep 17 00:00:00 2001 From: Fadi Arafeh Date: Wed, 30 Oct 2024 09:24:21 +0000 Subject: [PATCH] src: cpu: aarch64: lowp_matmul: Make weights constant Setting the weights as constant allows us to avoid redundant pretranspose and reduction operations in Arm Compute Library (ACL) every time execute is called (they are now run once and cached). This delives big speedups especially for relatively small matmuls. Note that this is a temp fix that needs to be handled carefully by primitive caches in frameworks, since the ACL object is now holding more state - i.e. we want to make sure that the cahce maps a layer with a specific set of weights to the oneDNN primitive storing those weights. We're currently working on the proper fix for this which involves making lowp_gemm stateless and fixed-format in ACL and oneDNN. --- src/cpu/aarch64/matmul/acl_lowp_matmul.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp index 076d5fd321a..8593492f4db 100644 --- a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp +++ b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp @@ -121,7 +121,7 @@ status_t acl_lowp_matmul_t::pd_t::init(engine_t *engine) { = arm_compute::TensorInfo(arm_compute::TensorShape(N(), K()), 1, arm_compute::DataType::QASYMM8_SIGNED, arm_compute::QuantizationInfo(1.0, 0, true)); - almc_.wei_tensor_info.set_are_values_constant(false); + almc_.wei_tensor_info.set_are_values_constant(true); almc_.bia_tensor_info = arm_compute::TensorInfo( arm_compute::TensorShape(), 1, arm_compute::DataType::F32);