From 4a33243415bdcfea2cd0d9125aa4ae6b4c2f7b28 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 10 Oct 2024 18:42:10 +0200 Subject: [PATCH] Add convolution pattern test --- .../src/common/lora_pattern.cpp | 118 +++++++++++++++--- 1 file changed, 100 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp index 31ab18406828fc..10e217045529d4 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/lora_pattern.cpp @@ -3,6 +3,7 @@ // #include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/node_builders/convolution.hpp" #include "common_test_utils/ov_tensor_utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "utils/cpu_test_utils.hpp" @@ -12,6 +13,31 @@ namespace ov { namespace test { class LoraPattern : public SubgraphBaseTest { +protected: + void run_test() { + compile_model(); + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + generate_inputs(targetStaticShapes.front()); + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + + inferRequest.infer(); + auto outputs = function->outputs(); + + auto tx_result = inferRequest.get_tensor(outputs[0]); + auto tz_result = inferRequest.get_tensor(outputs[1]); + ov::test::utils::compare(tx_result, tz_result, 1e-4, 1e-4); + } + +protected: + static constexpr auto t4_name = "lora/MatMul.B"; + static constexpr auto t5_name = "lora/MatMul.alpha"; + static constexpr auto t6_name = "lora/MatMul.A"; +}; + +class LoraPatternMatmul : public LoraPattern { public: void SetUp() override { targetDevice = utils::DEVICE_CPU; @@ -57,34 +83,90 @@ class LoraPattern : public SubgraphBaseTest { function = std::make_shared(ov::ResultVector({result_x, result_z}), ov::SinkVector({t4_assign, t5_assign, t6_assign}), ov::ParameterVector({param_y, param_w})); + + targetStaticShapes = {{{{1, 20, K}}, {{N, K}}}}; } public: static constexpr size_t K = 563ul; static constexpr size_t N = 2048ul; - - static constexpr auto t4_name = "lora/MatMul.B"; - static constexpr auto t5_name = "lora/MatMul.alpha"; - static constexpr auto t6_name = "lora/MatMul.A"; }; -TEST_F(LoraPattern, smoke_empty_states) { - compile_model(); - inferRequest = compiledModel.create_infer_request(); - ASSERT_TRUE(inferRequest); - targetStaticShapes = {{{{1, 20, K}}, {{N, K}}}}; - generate_inputs(targetStaticShapes.front()); - for (const auto& input : inputs) { - inferRequest.set_tensor(input.first, input.second); +TEST_F(LoraPatternMatmul, smoke_empty_states) { + run_test(); +} + +class LoraPatternConvolution : public LoraPattern { +public: + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + + auto netType = ov::element::f32; + + ov::PartialShape shape_x = {-1, num_channels, -1, -1}; + + auto param_y = std::make_shared(netType, shape_x); + + // Original Convolution that is modified by LoRA adapter later + auto tx = ov::test::utils::make_convolution(param_y, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels); + + // LoRA parameters from states + auto variable_t4 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({num_channels, -1}), netType, t4_name}); + auto t4 = std::make_shared(variable_t4); + auto t4_assign = std::make_shared(t4, variable_t4); + + auto variable_t5 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), netType, t5_name}); + auto t5 = std::make_shared(variable_t5); + auto t5_assign = std::make_shared(t5, variable_t5); + + auto variable_t6 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, num_channels}), netType, t6_name}); + auto t6 = std::make_shared(variable_t6); + auto t6_assign = std::make_shared(t6, variable_t6); + + // LoRA pattern with additional Transposes to move channel dimensions into positions where MatMul can be applied + auto t4940 = + std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{2, 3, 0, 1}); + + auto t4941 = std::make_shared(param_y, t4940); + auto t4942 = std::make_shared(t4941, t6, false, true); + auto t4943 = std::make_shared(t4942, t5); + auto t4944 = std::make_shared(t4943, t4, false, true); + + auto t4945 = + std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{2, 3, 0, 1}); + auto t4946 = std::make_shared(t4944, t4945); + + + // Mix LoRA part into normally computed activations after the "main" MatMul + auto tz = std::make_shared(tx, t4946); + + auto result_x = std::make_shared(tx); + auto result_z = std::make_shared(tz); + + function = std::make_shared(ov::ResultVector({result_x, result_z}), + ov::SinkVector({t4_assign, t5_assign, t6_assign}), + ov::ParameterVector({param_y})); + + targetStaticShapes = {{{1, num_channels, 64, 64}}}; } - inferRequest.infer(); - auto outputs = function->outputs(); +public: + static constexpr size_t num_channels = 320ul; +}; - auto tx_result = inferRequest.get_tensor(outputs[0]); - auto tz_result = inferRequest.get_tensor(outputs[1]); - ov::test::utils::compare(tx_result, tz_result, 1e-4, 1e-4); +TEST_F(LoraPatternConvolution, smoke_empty_states) { + run_test(); } - } // namespace test } // namespace ov