From 90a1e860c93b553fa9684113e02d41d622235c55 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Fri, 8 Sep 2023 18:40:28 +0200 Subject: [PATCH 01/15] [PTQ][OV] Add GroupNormalization type (#2113) ### Changes - Added new operation - GroupNormalization ### Reason for changes - Performance degradations that are caused by not correct quantization scheme - New operation support ### Related tickets - 119821 - 119335 ### Tests - TBD --- nncf/common/hardware/configs/cpu.json | 6 + nncf/common/hardware/opset.py | 1 + nncf/openvino/graph/metatypes/groups.py | 1 + .../graph/metatypes/openvino_metatypes.py | 7 + .../GroupNormalizationModel.dot | 29 +++ .../quantized/GroupNormalizationModel.dot | 69 +++++++ .../GroupNormalizationModel_mixed.json | 174 ++++++++++++++++++ .../GroupNormalizationModel_performance.json | 174 ++++++++++++++++++ tests/openvino/native/models.py | 33 ++++ 9 files changed, 494 insertions(+) create mode 100644 tests/openvino/native/data/reference_graphs/original_nncf_graph/GroupNormalizationModel.dot create mode 100644 tests/openvino/native/data/reference_graphs/quantized/GroupNormalizationModel.dot create mode 100644 tests/openvino/native/data/reference_scales/GroupNormalizationModel_mixed.json create mode 100644 tests/openvino/native/data/reference_scales/GroupNormalizationModel_performance.json diff --git a/nncf/common/hardware/configs/cpu.json b/nncf/common/hardware/configs/cpu.json index db001c8ad07..9adf25550e7 100644 --- a/nncf/common/hardware/configs/cpu.json +++ b/nncf/common/hardware/configs/cpu.json @@ -258,6 +258,12 @@ "activations": "q8_a" } }, + { + "type": "GroupNormalization", + "quantization": { + "activations": "q8_a" + } + }, {"type": "Flatten"}, {"type": "Squeeze"}, {"type": "Unsqueeze"}, diff --git a/nncf/common/hardware/opset.py b/nncf/common/hardware/opset.py index 685ada28118..32b7069013c 100644 --- a/nncf/common/hardware/opset.py +++ b/nncf/common/hardware/opset.py @@ -57,3 +57,4 @@ class HWConfigOpName: GELU = "Gelu" LSTMSEQUENCE = "LSTMSequence" GRUSEQUENCE = "GRUSequence" + GROUPNORMALIZATION = "GroupNormalization" diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index 84ad2acf3c1..e616fdf5dd7 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -80,6 +80,7 @@ ov_metatypes.OVSquaredDifferenceMetatype, ov_metatypes.OVLSTMSequenceMetatype, ov_metatypes.OVGRUSequenceMetatype, + ov_metatypes.OVGroupNormalizationMetatype, ] diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py index e6e42f50cd0..15d1a7648d1 100644 --- a/nncf/openvino/graph/metatypes/openvino_metatypes.py +++ b/nncf/openvino/graph/metatypes/openvino_metatypes.py @@ -673,6 +673,13 @@ class OVAbsMetatype(OVOpMetatype): op_names = ["Abs"] +@OV_OPERATOR_METATYPES.register() +class OVGroupNormalizationMetatype(OVOpMetatype): + name = "GroupNormalizationOp" + op_names = ["GroupNormalization"] + hw_config_names = [HWConfigOpName.GROUPNORMALIZATION] + + def get_operator_metatypes() -> List[Type[OperatorMetatype]]: """ Returns a list of the operator metatypes. diff --git a/tests/openvino/native/data/reference_graphs/original_nncf_graph/GroupNormalizationModel.dot b/tests/openvino/native/data/reference_graphs/original_nncf_graph/GroupNormalizationModel.dot new file mode 100644 index 00000000000..44ba7698a92 --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/original_nncf_graph/GroupNormalizationModel.dot @@ -0,0 +1,29 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Conv" [id=1, type=Convolution]; +"2 Conv_Add" [id=2, type=Add]; +"3 GroupNormalization_169" [id=3, type=GroupNormalization]; +"4 Relu" [id=4, type=Relu]; +"5 Mul" [id=5, type=Multiply]; +"6 Add" [id=6, type=Add]; +"7 Result" [id=7, type=Result]; +"8 Constant_173" [id=8, type=Constant]; +"9 Constant_171" [id=9, type=Constant]; +"10 Constant_168" [id=10, type=Constant]; +"11 Constant_167" [id=11, type=Constant]; +"12 Bias" [id=12, type=Constant]; +"13 Constant_163" [id=13, type=Constant]; +"0 Input_1" -> "1 Conv" [label="[1, 2, 3, 4, 4]", style=solid]; +"1 Conv" -> "2 Conv_Add" [label="[1, 4, 1, 2, 2]", style=solid]; +"2 Conv_Add" -> "3 GroupNormalization_169" [label="[1, 4, 3, 2, 2]", style=solid]; +"3 GroupNormalization_169" -> "4 Relu" [label="[1, 4, 3, 2, 2]", style=solid]; +"4 Relu" -> "5 Mul" [label="[1, 4, 3, 2, 2]", style=solid]; +"5 Mul" -> "6 Add" [label="[1, 4, 3, 2, 2]", style=solid]; +"6 Add" -> "7 Result" [label="[1, 4, 3, 2, 2]", style=solid]; +"8 Constant_173" -> "6 Add" [label="[1, 4, 1, 1, 1]", style=solid]; +"9 Constant_171" -> "5 Mul" [label="[1, 4, 1, 1, 1]", style=solid]; +"10 Constant_168" -> "3 GroupNormalization_169" [label="[4]", style=solid]; +"11 Constant_167" -> "3 GroupNormalization_169" [label="[4]", style=solid]; +"12 Bias" -> "2 Conv_Add" [label="[1, 1, 3, 1, 1]", style=solid]; +"13 Constant_163" -> "1 Conv" [label="[4, 2, 3, 3, 3]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_graphs/quantized/GroupNormalizationModel.dot b/tests/openvino/native/data/reference_graphs/quantized/GroupNormalizationModel.dot new file mode 100644 index 00000000000..90d267ee9dd --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/GroupNormalizationModel.dot @@ -0,0 +1,69 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_1/fq_output_0" [id=1, type=FakeQuantize]; +"2 Conv" [id=2, type=Convolution]; +"3 Conv_Add" [id=3, type=Add]; +"4 Conv_Add/fq_output_0" [id=4, type=FakeQuantize]; +"5 GroupNormalization_27318" [id=5, type=GroupNormalization]; +"6 Relu" [id=6, type=Relu]; +"7 Relu/fq_output_0" [id=7, type=FakeQuantize]; +"8 Mul" [id=8, type=Multiply]; +"9 Add" [id=9, type=Add]; +"10 Result" [id=10, type=Result]; +"11 Constant_27322" [id=11, type=Constant]; +"12 Constant_27320" [id=12, type=Constant]; +"13 Constant_29930" [id=13, type=Constant]; +"14 Constant_29929" [id=14, type=Constant]; +"15 Constant_29928" [id=15, type=Constant]; +"16 Constant_29927" [id=16, type=Constant]; +"17 Constant_27317" [id=17, type=Constant]; +"18 Constant_27316" [id=18, type=Constant]; +"19 Constant_29925" [id=19, type=Constant]; +"20 Constant_29924" [id=20, type=Constant]; +"21 Constant_29923" [id=21, type=Constant]; +"22 Constant_29922" [id=22, type=Constant]; +"23 Bias" [id=23, type=Constant]; +"24 Conv/fq_weights_1" [id=24, type=FakeQuantize]; +"25 Constant_29920" [id=25, type=Constant]; +"26 Constant_29919" [id=26, type=Constant]; +"27 Constant_29918" [id=27, type=Constant]; +"28 Constant_29917" [id=28, type=Constant]; +"29 Constant_27312" [id=29, type=Constant]; +"30 Constant_29915" [id=30, type=Constant]; +"31 Constant_29914" [id=31, type=Constant]; +"32 Constant_29913" [id=32, type=Constant]; +"33 Constant_29912" [id=33, type=Constant]; +"0 Input_1" -> "1 Input_1/fq_output_0" [label="[1, 2, 3, 4, 4]", style=solid]; +"1 Input_1/fq_output_0" -> "2 Conv" [label="[1, 2, 3, 4, 4]", style=solid]; +"2 Conv" -> "3 Conv_Add" [label="[1, 4, 1, 2, 2]", style=solid]; +"3 Conv_Add" -> "4 Conv_Add/fq_output_0" [label="[1, 4, 3, 2, 2]", style=solid]; +"4 Conv_Add/fq_output_0" -> "5 GroupNormalization_27318" [label="[1, 4, 3, 2, 2]", style=solid]; +"5 GroupNormalization_27318" -> "6 Relu" [label="[1, 4, 3, 2, 2]", style=solid]; +"6 Relu" -> "7 Relu/fq_output_0" [label="[1, 4, 3, 2, 2]", style=solid]; +"7 Relu/fq_output_0" -> "8 Mul" [label="[1, 4, 3, 2, 2]", style=solid]; +"8 Mul" -> "9 Add" [label="[1, 4, 3, 2, 2]", style=solid]; +"9 Add" -> "10 Result" [label="[1, 4, 3, 2, 2]", style=solid]; +"11 Constant_27322" -> "9 Add" [label="[1, 4, 1, 1, 1]", style=solid]; +"12 Constant_27320" -> "8 Mul" [label="[1, 4, 1, 1, 1]", style=solid]; +"13 Constant_29930" -> "7 Relu/fq_output_0" [label="[]", style=solid]; +"14 Constant_29929" -> "7 Relu/fq_output_0" [label="[]", style=solid]; +"15 Constant_29928" -> "7 Relu/fq_output_0" [label="[]", style=solid]; +"16 Constant_29927" -> "7 Relu/fq_output_0" [label="[]", style=solid]; +"17 Constant_27317" -> "5 GroupNormalization_27318" [label="[4]", style=solid]; +"18 Constant_27316" -> "5 GroupNormalization_27318" [label="[4]", style=solid]; +"19 Constant_29925" -> "4 Conv_Add/fq_output_0" [label="[]", style=solid]; +"20 Constant_29924" -> "4 Conv_Add/fq_output_0" [label="[]", style=solid]; +"21 Constant_29923" -> "4 Conv_Add/fq_output_0" [label="[]", style=solid]; +"22 Constant_29922" -> "4 Conv_Add/fq_output_0" [label="[]", style=solid]; +"23 Bias" -> "3 Conv_Add" [label="[1, 1, 3, 1, 1]", style=solid]; +"24 Conv/fq_weights_1" -> "2 Conv" [label="[4, 2, 3, 3, 3]", style=solid]; +"25 Constant_29920" -> "24 Conv/fq_weights_1" [label="[4, 1, 1, 1, 1]", style=solid]; +"26 Constant_29919" -> "24 Conv/fq_weights_1" [label="[4, 1, 1, 1, 1]", style=solid]; +"27 Constant_29918" -> "24 Conv/fq_weights_1" [label="[4, 1, 1, 1, 1]", style=solid]; +"28 Constant_29917" -> "24 Conv/fq_weights_1" [label="[4, 1, 1, 1, 1]", style=solid]; +"29 Constant_27312" -> "24 Conv/fq_weights_1" [label="[4, 2, 3, 3, 3]", style=solid]; +"30 Constant_29915" -> "1 Input_1/fq_output_0" [label="[]", style=solid]; +"31 Constant_29914" -> "1 Input_1/fq_output_0" [label="[]", style=solid]; +"32 Constant_29913" -> "1 Input_1/fq_output_0" [label="[]", style=solid]; +"33 Constant_29912" -> "1 Input_1/fq_output_0" [label="[]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_scales/GroupNormalizationModel_mixed.json b/tests/openvino/native/data/reference_scales/GroupNormalizationModel_mixed.json new file mode 100644 index 00000000000..0240e6cd60f --- /dev/null +++ b/tests/openvino/native/data/reference_scales/GroupNormalizationModel_mixed.json @@ -0,0 +1,174 @@ +{ + "Relu/fq_output_0": { + "input_low": 0.0, + "input_high": 1.8180807828903198, + "output_low": 0.0, + "output_high": 1.8180807828903198 + }, + "Conv_Add/fq_output_0": { + "input_low": 0.0, + "input_high": 18.557750701904297, + "output_low": 0.0, + "output_high": 18.557750701904297 + }, + "Conv/fq_weights_1": { + "input_low": [ + [ + [ + [ + [ + -1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + -1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9898346662521362 + ] + ] + ] + ] + ], + "input_high": [ + [ + [ + [ + [ + 1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + 1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9898346662521362 + ] + ] + ] + ] + ], + "output_low": [ + [ + [ + [ + [ + -1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + -1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9898346662521362 + ] + ] + ] + ] + ], + "output_high": [ + [ + [ + [ + [ + 1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + 1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9898346662521362 + ] + ] + ] + ] + ] + }, + "Input_1/fq_output_0": { + "input_low": 0.0, + "input_high": 0.997209906578064, + "output_low": 0.0, + "output_high": 0.997209906578064 + } +} \ No newline at end of file diff --git a/tests/openvino/native/data/reference_scales/GroupNormalizationModel_performance.json b/tests/openvino/native/data/reference_scales/GroupNormalizationModel_performance.json new file mode 100644 index 00000000000..0240e6cd60f --- /dev/null +++ b/tests/openvino/native/data/reference_scales/GroupNormalizationModel_performance.json @@ -0,0 +1,174 @@ +{ + "Relu/fq_output_0": { + "input_low": 0.0, + "input_high": 1.8180807828903198, + "output_low": 0.0, + "output_high": 1.8180807828903198 + }, + "Conv_Add/fq_output_0": { + "input_low": 0.0, + "input_high": 18.557750701904297, + "output_low": 0.0, + "output_high": 18.557750701904297 + }, + "Conv/fq_weights_1": { + "input_low": [ + [ + [ + [ + [ + -1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + -1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9898346662521362 + ] + ] + ] + ] + ], + "input_high": [ + [ + [ + [ + [ + 1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + 1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9898346662521362 + ] + ] + ] + ] + ], + "output_low": [ + [ + [ + [ + [ + -1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + -1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + -1.9898346662521362 + ] + ] + ] + ] + ], + "output_high": [ + [ + [ + [ + [ + 1.994419813156128 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9901930093765259 + ] + ] + ] + ], + [ + [ + [ + [ + 1.932124137878418 + ] + ] + ] + ], + [ + [ + [ + [ + 1.9898346662521362 + ] + ] + ] + ] + ] + }, + "Input_1/fq_output_0": { + "input_low": 0.0, + "input_high": 0.997209906578064, + "output_low": 0.0, + "output_high": 0.997209906578064 + } +} \ No newline at end of file diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py index c5af53819db..07fbe7cff19 100644 --- a/tests/openvino/native/models.py +++ b/tests/openvino/native/models.py @@ -15,6 +15,7 @@ import numpy as np import openvino.runtime as ov from openvino.runtime import opset9 as opset +from openvino.runtime import opset12 from nncf.common.utils.registry import Registry @@ -637,3 +638,35 @@ def _create_ov_model(self): result = opset.result(matmul_2, name="Result") model = ov.Model([result], [input_1]) return model + + +@SYNTHETIC_MODELS.register() +class GroupNormalizationModel(OVReferenceModel): + def _create_ov_model(self): + groups_num = 2 + channels = 4 + input_1 = opset.parameter([1, groups_num, 3, 4, 4], name="Input_1") + + kernel = self._rng.random((channels, groups_num, 3, 3, 3)).astype(np.float32) + strides = [1, 1, 1] + pads = [0, 0, 0] + dilations = [1, 1, 1] + conv = opset.convolution(input_1, kernel, strides, pads, pads, dilations, name="Conv") + bias = opset.constant(np.zeros((1, 1, 3, 1, 1)), dtype=np.float32, name="Bias") + conv_add = opset.add(conv, bias, name="Conv_Add") + + scale = self._rng.random(channels).astype(np.float32) + bias = self._rng.random(channels).astype(np.float32) + group_norm = opset12.group_normalization(conv_add, scale, bias, num_groups=channels, epsilon=1e-5) + + relu = opset.relu(group_norm, name="Relu") + + mean = self._rng.random((1, channels, 1, 1, 1)).astype(np.float32) + scale = self._rng.random((1, channels, 1, 1, 1)).astype(np.float32) + multiply = opset.multiply(relu, 1 / scale, name="Mul") + add = opset.add(multiply, (-1) * mean, name="Add") + + result = opset.result(add, name="Result") + result.get_output_tensor(0).set_names(set(["Result"])) + model = ov.Model([result], [input_1]) + return model From a12c52a3b2e996da4a38bdf21b756c51316558fe Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 11 Sep 2023 09:09:29 +0200 Subject: [PATCH 02/15] [TF] Disable MaskRCNN and RetinaNet graph tests (#2119) ### Changes Disable MaskRCNN and RetinaNet graph tests until ticket 119664 is resolved. The following tests are now excluded: test_compressed_graph.py::TestModelsGraph::test_quantize_network[w_sym_t_a_sym_t-retinanet] test_compressed_graph.py::TestModelsGraph::test_quantize_network[w_sym_t_a_sym_t-mask_rcnn] test_compressed_graph.py::TestModelsGraph::test_quantize_network[w_sym_ch_a_asym_t-retinanet] test_compressed_graph.py::TestModelsGraph::test_quantize_network[w_sym_ch_a_asym_t-mask_rcnn] test_compressed_graph.py::TestModelsGraph::test_magnitude_sparsity_network[retinanet] test_compressed_graph.py::TestModelsGraph::test_magnitude_sparsity_network[mask_rcnn] test_compressed_graph.py::TestModelsGraph::test_rb_sparsity_network[retinanet] test_compressed_graph.py::TestModelsGraph::test_rb_sparsity_network[mask_rcnn] test_compressed_graph.py::TestModelsGraph::test_pruning_network[retinanet] test_compressed_graph.py::test_quantize_outputs[w_sym_t_a_sym_t-retinanet] test_compressed_graph.py::test_quantize_outputs[w_sym_ch_a_asym_t-retinanet] --- tests/tensorflow/test_compressed_graph.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/tensorflow/test_compressed_graph.py b/tests/tensorflow/test_compressed_graph.py index 45dca492610..b8be4fa9b10 100644 --- a/tests/tensorflow/test_compressed_graph.py +++ b/tests/tensorflow/test_compressed_graph.py @@ -194,11 +194,15 @@ def __init__( "nasnet_mobile": pytest.mark.skip(reason="gitlab issue #18"), "mobilenet_v2_slim": pytest.mark.skip(reason="ticket #46349"), "xception": pytest.mark.skip(reason="gitlab issue #28"), + "retinanet": pytest.mark.skip(reason="ticket #119664"), + "mask_rcnn": pytest.mark.skip(reason="ticket #119664"), }, "magnitude_sparsity": { "inception_resnet_v2": pytest.mark.skip(reason="gitlab issue #17"), "nasnet_mobile": pytest.mark.skip(reason="gitlab issue #18"), "xception": pytest.mark.skip(reason="gitlab issue #28"), + "retinanet": pytest.mark.skip(reason="ticket #119664"), + "mask_rcnn": pytest.mark.skip(reason="ticket #119664"), }, "filter_pruning": { "densenet121": pytest.mark.skip(reason="ticket #50604"), @@ -208,8 +212,13 @@ def __init__( "mask_rcnn": pytest.mark.skip(reason="ticket #50605"), "resnet50v2": pytest.mark.skip(reason="Several masks on one weight"), "mobilenet_v2_slim": pytest.mark.skip(reason="ticket #46349"), + "retinanet": pytest.mark.skip(reason="ticket #119664"), + }, + "rb_sparsity": { + "mobilenet_v2_slim": pytest.mark.skip(reason="ticket #46349"), + "retinanet": pytest.mark.skip(reason="ticket #119664"), + "mask_rcnn": pytest.mark.skip(reason="ticket #119664"), }, - "rb_sparsity": {"mobilenet_v2_slim": pytest.mark.skip(reason="ticket #46349")}, } @@ -471,7 +480,8 @@ def test_pruning_network(self, desc: ModelDesc, _pruning_case_config): QUANTIZE_OUTPUTS_MODELS = [ ModelDesc("mobilenet_v2_quantize_outputs.pb", test_models.MobileNetV2, [1, 96, 96, 3]), - ModelDesc("retinanet_quantize_outputs.pb", test_models.RetinaNet, [1, 603, 603, 3]), + # Skip this model due to #119664 + # ModelDesc("retinanet_quantize_outputs.pb", test_models.RetinaNet, [1, 603, 603, 3]), ModelDesc("sequential_model_quantize_outputs.pb", test_models.SequentialModel, [1, 224, 224, 3]), ModelDesc("shared_layers_model_quantize_outputs.pb", test_models.SharedLayersModel, [1, 30, 30, 3]), ] From da7a1973fb99d933a72dfd8a240f18d2fdb68e27 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Mon, 11 Sep 2023 10:09:58 +0300 Subject: [PATCH 03/15] Add message about deprecation `export_to_onnx_standard_ops` (#2118) ### Changes Add message about deprecation of `export_to_onnx_standard_ops` option in NNCFConfig ### Reason Recommended way to export to onnx with QuantizeLinear-DequantizeLinear node pairs is `nncf.strip(quantized_model)`. --- nncf/config/schemata/algo/quantization.py | 2 +- nncf/torch/quantization/algo.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/nncf/config/schemata/algo/quantization.py b/nncf/config/schemata/algo/quantization.py index b077426d083..473642feab2 100644 --- a/nncf/config/schemata/algo/quantization.py +++ b/nncf/config/schemata/algo/quantization.py @@ -526,7 +526,7 @@ }, "export_to_onnx_standard_ops": with_attributes( BOOLEAN, - description="Determines how should the additional quantization " + description="[Deprecated] Determines how should the additional quantization " "operations be exported into the ONNX format. Set " "this to true to export to ONNX " "standard QuantizeLinear-DequantizeLinear " diff --git a/nncf/torch/quantization/algo.py b/nncf/torch/quantization/algo.py index d5b4269bf87..df78e921bdc 100644 --- a/nncf/torch/quantization/algo.py +++ b/nncf/torch/quantization/algo.py @@ -30,6 +30,7 @@ from nncf.api.compression import CompressionLoss from nncf.api.compression import CompressionScheduler from nncf.api.compression import CompressionStage +from nncf.common.deprecation import warning_deprecated from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode from nncf.common.graph.definitions import MODEL_INPUT_OP_NAME @@ -1361,6 +1362,11 @@ def __init__( "export_to_onnx_standard_ops", QUANTIZATION_EXPORT_TO_ONNX_STANDARD_OPS ) if should_export_to_onnx_qdq: + warning_deprecated( + "The config option `export_to_onnx_standard_ops` is deprecated and will be removed " + "in a future version. Please use the `nncf.strip(quantized_model)` method before export to ONNX " + "to get model with QuantizeLinear-DequantizeLinear node pairs." + ) export_mode = QuantizerExportMode.ONNX_QUANTIZE_DEQUANTIZE_PAIRS else: export_mode = QuantizerExportMode.FAKE_QUANTIZE From dc27ed1a8acfcfd6925bc27acd05ba2b35ba26d8 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Mon, 11 Sep 2023 08:36:36 +0100 Subject: [PATCH 04/15] [OV] Disable quantization of GRUSequence with linear_before_reset=True (#2115) ### Changes NNCF should not quantize GRU ops with linear_before_reset set to true, since oneDNN does not support it yet ### Reason for changes To align with POT ### Related bug https://github.com/openvinotoolkit/nncf/issues/2105 ### Tests Added `test_ignore_nodes_by_attribues` for OV backend --- nncf/openvino/graph/nncf_graph_builder.py | 5 +- .../algorithms/min_max/algorithm.py | 5 ++ .../algorithms/min_max/backend.py | 10 +++ .../algorithms/min_max/onnx_backend.py | 4 + .../algorithms/min_max/openvino_backend.py | 10 +++ .../algorithms/min_max/torch_backend.py | 4 + ...GRUSequenceModel_linear_before_reset_F.dot | 81 +++++++++++++++++++ ...GRUSequenceModel_linear_before_reset_T.dot | 41 ++++++++++ tests/openvino/native/models.py | 48 +++++++++-- .../native/quantization/test_graphs.py | 12 +++ 10 files changed, 212 insertions(+), 8 deletions(-) create mode 100644 tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_F.dot create mode 100644 tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_T.dot diff --git a/nncf/openvino/graph/nncf_graph_builder.py b/nncf/openvino/graph/nncf_graph_builder.py index 8fd92378c51..b1101224127 100644 --- a/nncf/openvino/graph/nncf_graph_builder.py +++ b/nncf/openvino/graph/nncf_graph_builder.py @@ -115,6 +115,7 @@ def _add_nncf_node(node: ov.Node, graph: NNCFGraph) -> None: metatype = get_node_metatype(node) graph.add_nncf_node(node_name=node.get_friendly_name(), node_type=node_type, node_metatype=metatype) + # pylint: disable=too-many-branches @staticmethod def create_nncf_graph(model: ov.Model) -> NNCFGraph: """ @@ -174,8 +175,10 @@ def create_nncf_graph(model: ov.Model) -> NNCFGraph: node_attributes = node.get_attributes() const_transpose_name = attribute_names[const_port_id] const_attrs[const_port_id]["transpose"] = node_attributes[const_transpose_name] - act_attrs["transpose"] = node_attributes[attribute_names[act_port_id]] + elif metatype == OVGRUSequenceMetatype: + node_attributes = node.get_attributes() + act_attrs["linear_before_reset"] = node_attributes["linear_before_reset"] if const_attrs or act_attrs: nncf_node = nncf_graph.get_node_by_name(node_name) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index ff44a7e9255..72718bbeb68 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -323,6 +323,11 @@ def _get_ignored_names( ignored_names = {name: IgnoreReason.AUTOGENERATED for name in autogenerated_ignored_names} + ignored_names_by_layer_attributes = self._backend_entity.get_ignored_names_by_layer_attributes( + inference_nncf_graph + ) + ignored_names.update({name: IgnoreReason.AUTOGENERATED for name in ignored_names_by_layer_attributes}) + # User ignored scope has higher priority ignored_names.update({name: IgnoreReason.USER_REQUESTED for name in user_ignored_names}) diff --git a/nncf/quantization/algorithms/min_max/backend.py b/nncf/quantization/algorithms/min_max/backend.py index 90776ec3e02..254a1c949cf 100644 --- a/nncf/quantization/algorithms/min_max/backend.py +++ b/nncf/quantization/algorithms/min_max/backend.py @@ -215,6 +215,16 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O :return: List of ignored metatypes. """ + @staticmethod + @abstractmethod + def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]: + """ + Returns names of ignored nodes based on layer_attributes. + + :param nncf_graph: NNCFGraph instance. + :return: List of ignored names. + """ + @staticmethod @abstractmethod def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]: diff --git a/nncf/quantization/algorithms/min_max/onnx_backend.py b/nncf/quantization/algorithms/min_max/onnx_backend.py index f5649f94ba2..47cf5695832 100644 --- a/nncf/quantization/algorithms/min_max/onnx_backend.py +++ b/nncf/quantization/algorithms/min_max/onnx_backend.py @@ -195,6 +195,10 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O types.append(om.ONNXMulLayerMetatype) return types + @staticmethod + def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]: + return [] + @staticmethod def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]: return [node for node in nncf_graph.get_all_nodes() if node.layer_attributes.has_weight()] diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 42d89b76051..4394c1870fa 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -226,6 +226,16 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O types.append(om.OVMultiplyMetatype) return types + @staticmethod + def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]: + ignored_names = [] + target_nodes = nncf_graph.get_nodes_by_metatypes([om.OVGRUSequenceMetatype]) + for node in target_nodes: + if isinstance(node.layer_attributes, OVLayerAttributes): + if node.layer_attributes.input_attributes["linear_before_reset"]: + ignored_names.append(node.node_name) + return ignored_names + @staticmethod def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]: return [ diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index 59c410274f8..620a276eaf1 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -327,6 +327,10 @@ def get_ignored_metatypes(model_type: ModelType, device: TargetDevice) -> List[O types.append(om.PTMulMetatype) return types + @staticmethod + def get_ignored_names_by_layer_attributes(nncf_graph: NNCFGraph) -> List[str]: + return [] + @staticmethod def get_weight_nodes(nncf_graph: NNCFGraph) -> List[NNCFNode]: return [ diff --git a/tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_F.dot b/tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_F.dot new file mode 100644 index 00000000000..f97ac8a2d68 --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_F.dot @@ -0,0 +1,81 @@ +strict digraph { +"0 X" [id=0, type=Parameter]; +"1 initial_hidden_state" [id=1, type=Parameter]; +"2 X/fq_output_0" [id=2, type=FakeQuantize]; +"3 initial_hidden_state/fq_output_0" [id=3, type=FakeQuantize]; +"4 GRUSequence" [id=4, type=GRUSequence]; +"5 GRUSequence/fq_output_0" [id=5, type=FakeQuantize]; +"6 MatMul" [id=6, type=MatMul]; +"7 Result" [id=7, type=Result]; +"8 MatMul/fq_weights_1" [id=8, type=FakeQuantize]; +"9 Constant_2541" [id=9, type=Constant]; +"10 Constant_2540" [id=10, type=Constant]; +"11 Constant_2539" [id=11, type=Constant]; +"12 Constant_2538" [id=12, type=Constant]; +"13 Constant_8" [id=13, type=Constant]; +"14 Constant_2526" [id=14, type=Constant]; +"15 Constant_2525" [id=15, type=Constant]; +"16 Constant_2524" [id=16, type=Constant]; +"17 Constant_2523" [id=17, type=Constant]; +"18 Constant_6" [id=18, type=Constant]; +"19 GRUSequence/fq_weights_4" [id=19, type=FakeQuantize]; +"20 Constant_2536" [id=20, type=Constant]; +"21 Constant_2535" [id=21, type=Constant]; +"22 Constant_2534" [id=22, type=Constant]; +"23 Constant_2533" [id=23, type=Constant]; +"24 Constant_5" [id=24, type=Constant]; +"25 GRUSequence/fq_weights_3" [id=25, type=FakeQuantize]; +"26 Constant_2531" [id=26, type=Constant]; +"27 Constant_2530" [id=27, type=Constant]; +"28 Constant_2529" [id=28, type=Constant]; +"29 Constant_2528" [id=29, type=Constant]; +"30 Constant_4" [id=30, type=Constant]; +"31 Constant_3" [id=31, type=Constant]; +"32 Constant_2521" [id=32, type=Constant]; +"33 Constant_2520" [id=33, type=Constant]; +"34 Constant_2519" [id=34, type=Constant]; +"35 Constant_2518" [id=35, type=Constant]; +"36 Constant_2516" [id=36, type=Constant]; +"37 Constant_2515" [id=37, type=Constant]; +"38 Constant_2514" [id=38, type=Constant]; +"39 Constant_2513" [id=39, type=Constant]; +"0 X" -> "2 X/fq_output_0" [label="[3, 2, 16]", style=solid]; +"1 initial_hidden_state" -> "3 initial_hidden_state/fq_output_0" [label="[3, 1, 128]", style=solid]; +"2 X/fq_output_0" -> "4 GRUSequence" [label="[3, 2, 16]", style=solid]; +"3 initial_hidden_state/fq_output_0" -> "4 GRUSequence" [label="[3, 1, 128]", style=solid]; +"4 GRUSequence" -> "5 GRUSequence/fq_output_0" [label="[3, 1, 2, 128]", style=solid]; +"5 GRUSequence/fq_output_0" -> "6 MatMul" [label="[3, 1, 2, 128]", style=solid]; +"6 MatMul" -> "7 Result" [label="[3, 1, 2, 3]", style=solid]; +"8 MatMul/fq_weights_1" -> "6 MatMul" [label="[3, 1, 128, 3]", style=solid]; +"9 Constant_2541" -> "8 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"10 Constant_2540" -> "8 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"11 Constant_2539" -> "8 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"12 Constant_2538" -> "8 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"13 Constant_8" -> "8 MatMul/fq_weights_1" [label="[3, 1, 128, 3]", style=solid]; +"14 Constant_2526" -> "5 GRUSequence/fq_output_0" [label="[]", style=solid]; +"15 Constant_2525" -> "5 GRUSequence/fq_output_0" [label="[]", style=solid]; +"16 Constant_2524" -> "5 GRUSequence/fq_output_0" [label="[]", style=solid]; +"17 Constant_2523" -> "5 GRUSequence/fq_output_0" [label="[]", style=solid]; +"18 Constant_6" -> "4 GRUSequence" [label="[1, 384]", style=solid]; +"19 GRUSequence/fq_weights_4" -> "4 GRUSequence" [label="[1, 384, 128]", style=solid]; +"20 Constant_2536" -> "19 GRUSequence/fq_weights_4" [label="[1, 384, 1]", style=solid]; +"21 Constant_2535" -> "19 GRUSequence/fq_weights_4" [label="[1, 384, 1]", style=solid]; +"22 Constant_2534" -> "19 GRUSequence/fq_weights_4" [label="[1, 384, 1]", style=solid]; +"23 Constant_2533" -> "19 GRUSequence/fq_weights_4" [label="[1, 384, 1]", style=solid]; +"24 Constant_5" -> "19 GRUSequence/fq_weights_4" [label="[1, 384, 128]", style=solid]; +"25 GRUSequence/fq_weights_3" -> "4 GRUSequence" [label="[1, 384, 16]", style=solid]; +"26 Constant_2531" -> "25 GRUSequence/fq_weights_3" [label="[1, 384, 1]", style=solid]; +"27 Constant_2530" -> "25 GRUSequence/fq_weights_3" [label="[1, 384, 1]", style=solid]; +"28 Constant_2529" -> "25 GRUSequence/fq_weights_3" [label="[1, 384, 1]", style=solid]; +"29 Constant_2528" -> "25 GRUSequence/fq_weights_3" [label="[1, 384, 1]", style=solid]; +"30 Constant_4" -> "25 GRUSequence/fq_weights_3" [label="[1, 384, 16]", style=solid]; +"31 Constant_3" -> "4 GRUSequence" [label="[3]", style=dashed]; +"32 Constant_2521" -> "3 initial_hidden_state/fq_output_0" [label="[]", style=solid]; +"33 Constant_2520" -> "3 initial_hidden_state/fq_output_0" [label="[]", style=solid]; +"34 Constant_2519" -> "3 initial_hidden_state/fq_output_0" [label="[]", style=solid]; +"35 Constant_2518" -> "3 initial_hidden_state/fq_output_0" [label="[]", style=solid]; +"36 Constant_2516" -> "2 X/fq_output_0" [label="[]", style=solid]; +"37 Constant_2515" -> "2 X/fq_output_0" [label="[]", style=solid]; +"38 Constant_2514" -> "2 X/fq_output_0" [label="[]", style=solid]; +"39 Constant_2513" -> "2 X/fq_output_0" [label="[]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_T.dot b/tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_T.dot new file mode 100644 index 00000000000..a9819020b25 --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/GRUSequenceModel_linear_before_reset_T.dot @@ -0,0 +1,41 @@ +strict digraph { +"0 X" [id=0, type=Parameter]; +"1 initial_hidden_state" [id=1, type=Parameter]; +"2 GRUSequence" [id=2, type=GRUSequence]; +"3 GRUSequence/fq_output_0" [id=3, type=FakeQuantize]; +"4 MatMul" [id=4, type=MatMul]; +"5 Result" [id=5, type=Result]; +"6 MatMul/fq_weights_1" [id=6, type=FakeQuantize]; +"7 Constant_2205" [id=7, type=Constant]; +"8 Constant_2204" [id=8, type=Constant]; +"9 Constant_2203" [id=9, type=Constant]; +"10 Constant_2202" [id=10, type=Constant]; +"11 Constant_8" [id=11, type=Constant]; +"12 Constant_2200" [id=12, type=Constant]; +"13 Constant_2199" [id=13, type=Constant]; +"14 Constant_2198" [id=14, type=Constant]; +"15 Constant_2197" [id=15, type=Constant]; +"16 Constant_6" [id=16, type=Constant]; +"17 Constant_5" [id=17, type=Constant]; +"18 Constant_4" [id=18, type=Constant]; +"19 Constant_3" [id=19, type=Constant]; +"0 X" -> "2 GRUSequence" [label="[3, 2, 16]", style=solid]; +"1 initial_hidden_state" -> "2 GRUSequence" [label="[3, 1, 128]", style=solid]; +"2 GRUSequence" -> "3 GRUSequence/fq_output_0" [label="[3, 1, 2, 128]", style=solid]; +"3 GRUSequence/fq_output_0" -> "4 MatMul" [label="[3, 1, 2, 128]", style=solid]; +"4 MatMul" -> "5 Result" [label="[3, 1, 2, 3]", style=solid]; +"6 MatMul/fq_weights_1" -> "4 MatMul" [label="[3, 1, 128, 3]", style=solid]; +"7 Constant_2205" -> "6 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"8 Constant_2204" -> "6 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"9 Constant_2203" -> "6 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"10 Constant_2202" -> "6 MatMul/fq_weights_1" [label="[3, 1, 1, 3]", style=solid]; +"11 Constant_8" -> "6 MatMul/fq_weights_1" [label="[3, 1, 128, 3]", style=solid]; +"12 Constant_2200" -> "3 GRUSequence/fq_output_0" [label="[]", style=solid]; +"13 Constant_2199" -> "3 GRUSequence/fq_output_0" [label="[]", style=solid]; +"14 Constant_2198" -> "3 GRUSequence/fq_output_0" [label="[]", style=solid]; +"15 Constant_2197" -> "3 GRUSequence/fq_output_0" [label="[]", style=solid]; +"16 Constant_6" -> "2 GRUSequence" [label="[1, 512]", style=solid]; +"17 Constant_5" -> "2 GRUSequence" [label="[1, 384, 128]", style=solid]; +"18 Constant_4" -> "2 GRUSequence" [label="[1, 384, 16]", style=solid]; +"19 Constant_3" -> "2 GRUSequence" [label="[3]", style=dashed]; +} diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py index 07fbe7cff19..8d02accb0cc 100644 --- a/tests/openvino/native/models.py +++ b/tests/openvino/native/models.py @@ -454,14 +454,14 @@ def _create_ov_model(self): @SYNTHETIC_MODELS.register() class LSTMSequenceModel(OVReferenceModel): def _create_ov_model(self): - x = ov.opset9.parameter([1, 2, 16], name="X") - initial_hidden_state = ov.opset9.parameter([1, 1, 128], name="initial_hidden_state") - initial_cell_state = ov.opset9.parameter([1, 1, 128], name="initial_cell_state") - seq_len = ov.opset9.constant(np.array([2]), dtype=np.int32) + x = opset.parameter([1, 2, 16], name="X") + initial_hidden_state = opset.parameter([1, 1, 128], name="initial_hidden_state") + initial_cell_state = opset.parameter([1, 1, 128], name="initial_cell_state") + seq_len = opset.constant(np.array([2]), dtype=np.int32) - W = ov.opset9.constant(np.zeros(([1, 512, 16])), dtype=np.float32) - R = ov.opset9.constant(np.zeros(([1, 512, 128])), dtype=np.float32) - B = ov.opset9.constant(np.zeros(([1, 512])), dtype=np.float32) + W = opset.constant(np.zeros(([1, 512, 16])), dtype=np.float32) + R = opset.constant(np.zeros(([1, 512, 128])), dtype=np.float32) + B = opset.constant(np.zeros(([1, 512])), dtype=np.float32) lstm = opset.lstm_sequence( x, initial_hidden_state, initial_cell_state, seq_len, W, R, B, 128, "FORWARD", name="LSTMSequence" @@ -475,6 +475,40 @@ def _create_ov_model(self): return model +class GRUSequenceModel(OVReferenceModel): + def _create_ov_model(self, linear_before_reset=True): + hidden_size = 128 + + x = opset.parameter([3, 2, 16], name="X") + initial_hidden_state = opset.parameter([3, 1, hidden_size], name="initial_hidden_state") + seq_len = opset.constant(np.array([1, 2, 3]), dtype=np.int32) + + scale_factor = 4 if linear_before_reset else 3 + W = opset.constant(np.zeros(([1, 3 * hidden_size, 16])), dtype=np.float32) + R = opset.constant(np.zeros(([1, 3 * hidden_size, hidden_size])), dtype=np.float32) + B = opset.constant(np.zeros(([1, scale_factor * hidden_size])), dtype=np.float32) + + gru = opset.gru_sequence( + x, + initial_hidden_state, + seq_len, + W, + R, + B, + hidden_size, + direction="FORWARD", + linear_before_reset=linear_before_reset, + name="GRUSequence", + ) + data = self._rng.random((3, 1, hidden_size, 3)).astype(np.float32) + matmul = opset.matmul(gru.output(0), data, transpose_a=False, transpose_b=False, name="MatMul") + + result = opset.result(matmul, name="Result") + result.get_output_tensor(0).set_names(set(["Result"])) + model = ov.Model(results=[result], parameters=[x, initial_hidden_state]) + return model + + class MatmulSoftmaxMatmulBlock(OVReferenceModel): def _create_ov_model(self): input_1 = opset.parameter([1, 1, 1], name="Input") diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 373ee9cfd9d..b761aa68b68 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -29,6 +29,7 @@ from tests.openvino.native.models import DepthwiseConv3DModel from tests.openvino.native.models import DepthwiseConv4DModel from tests.openvino.native.models import DepthwiseConv5DModel +from tests.openvino.native.models import GRUSequenceModel from tests.openvino.native.models import MatmulSoftmaxMatmulBlock from tests.openvino.native.quantization.test_fq_params_calculation import quantize_model from tests.openvino.omz_helpers import convert_model @@ -141,3 +142,14 @@ def smooth_quant_model(ov_model: ov.Model, q_params: Dict, quantize=True): if quantize: modified_model = quantize_model(modified_model, q_params) return modified_model + + +@pytest.mark.parametrize( + "linear_before_reset", [True, False], ids=["linear_before_reset_True", "linear_before_reset_False"] +) +def test_ignore_nodes_by_attribues(linear_before_reset): + model = GRUSequenceModel(**{"linear_before_reset": linear_before_reset}).ov_model + quantized_model = quantize_model(model, {}) + postfix = "T" if linear_before_reset else "F" + path_ref_graph = QUANTIZED_REF_GRAPHS_DIR / f"GRUSequenceModel_linear_before_reset_{postfix}.dot" + compare_nncf_graphs(quantized_model, path_ref_graph) From 05ae9168caa7f28b5a4dd7b996c22b4ce5708db9 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 11 Sep 2023 10:41:03 +0200 Subject: [PATCH 05/15] Added Whisper notebook to the list of quantization samples (#2123) ### Changes Added Whisper notebook to the list of quantization samples --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f45d8502260..7605785a338 100644 --- a/README.md +++ b/README.md @@ -280,6 +280,7 @@ A collection of ready-to-run Jupyter* notebooks are available to demonstrate how - [NNCF Post-Training Optimization of Segment Anything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/237-segment-anything) - [NNCF Post-Training Optimization of CLIP Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification) - [NNCF Post-Training Optimization of ImageBind Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/239-image-bind) +- [NNCF Post-Training Optimization of Whisper Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/227-whisper-subtitles-generation) - [Quantize a Segmentation Model and Show Live Inference](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/110-ct-segmentation-quantize) - [Training to Deployment with TensorFlow and OpenVINO](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/301-tensorflow-training-openvino) - [Migrate quantization from POT API to NNCF API](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/111-yolov5-quantization-migration) From 2c4d290e11f3ef904479d1ccca125a792bfa3534 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Mon, 11 Sep 2023 12:51:54 +0300 Subject: [PATCH 06/15] Add marks nightly and weakly for torch tests (#2092) ### Changes - Add marks `nightly` and `weakly` for tests. - Mark sanity tests as `nightly` - Split `test_functions.TestParametrized` to fast for precommit and long for nightly - Time of torch precommit reduced from 60 to 40 mins - Set `xfail` for sanity tests with `--mode train` in case of segment fault. Sporadic segment fault reproduced on torch>=2.0.0 on call `backward` function. ### Related tickets 119128 --- Makefile | 12 +- tests/torch/nas/test_sanity_sample.py | 1 + tests/torch/pytest.ini | 2 + tests/torch/quantization/test_functions.py | 36 +- .../torch/quantization/test_sanity_sample.py | 2 + tests/torch/test_sanity_sample.py | 858 +++++++++--------- 6 files changed, 475 insertions(+), 436 deletions(-) diff --git a/Makefile b/Makefile index 173d7d57738..5bd8cf8f4dd 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,10 @@ ifdef DATA DATA_ARG := --data $(DATA) endif +ifdef WEEKLY_MODELS + WEEKLY_MODELS_ARG := --weekly-models $(WEEKLY_MODELS) +endif + install-pre-commit: pip install pre-commit==3.2.2 @@ -124,7 +128,13 @@ install-torch-dev: install-torch-test install-pre-commit install-pylint pip install -r examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt test-torch: - pytest ${COVERAGE_ARGS} tests/common tests/torch --junitxml ${JUNITXML_PATH} $(DATA_ARG) + pytest ${COVERAGE_ARGS} tests/common tests/torch -m "not weekly and not nightly" --junitxml ${JUNITXML_PATH} $(DATA_ARG) + +test-torch-nightly: + pytest ${COVERAGE_ARGS} tests/torch -m nightly --junitxml ${JUNITXML_PATH} $(DATA_ARG) + +test-torch-weekly: + pytest ${COVERAGE_ARGS} tests/torch -m weekly --junitxml ${JUNITXML_PATH} $(DATA_ARG) ${WEEKLY_MODELS_ARG} COMMON_PYFILES := $(shell python3 tools/collect_pylint_input_files_for_backend.py common) pylint-torch: diff --git a/tests/torch/nas/test_sanity_sample.py b/tests/torch/nas/test_sanity_sample.py index ee9f5399265..c2575e14428 100644 --- a/tests/torch/nas/test_sanity_sample.py +++ b/tests/torch/nas/test_sanity_sample.py @@ -96,6 +96,7 @@ def fixture_nas_desc(request, dataset_dir): return desc.finalize(dataset_dir) +@pytest.mark.nightly def test_e2e_supernet_training(nas_desc: NASSampleTestDescriptor, tmp_path, mocker): validator = nas_desc.get_validator() args = validator.get_default_args(tmp_path) diff --git a/tests/torch/pytest.ini b/tests/torch/pytest.ini index c016408adde..b35c6a2b6b7 100644 --- a/tests/torch/pytest.ini +++ b/tests/torch/pytest.ini @@ -5,5 +5,7 @@ markers = convert train install + nightly + weekly python_files = test_* xfail_strict = true diff --git a/tests/torch/quantization/test_functions.py b/tests/torch/quantization/test_functions.py index d150fb265d5..fa17bbe69fc 100644 --- a/tests/torch/quantization/test_functions.py +++ b/tests/torch/quantization/test_functions.py @@ -171,16 +171,16 @@ def skip_if_half_on_cpu(is_fp16, use_cuda): def check_quant_moved(test_input, test_val, ref_val, quant_len, input_low, input_range, is_fp16, rtol, atol=1e-10): """ Checks values in `test_val` are inside of closest quant and - values in `test_val` and `ref_val` elementwise eather equal with given rtol/atol or + values in `test_val` and `ref_val` elementwise either equal with given rtol/atol or values differ by correspondent `quant_len` +- rtol. :param test_input: Input of a quantizer. :param test_val: Given test value. :param ref_val: Given reference value. - :param quant_len: Lenghts of quants in quantizers + :param quant_len: Length of quants in quantizers (for each channel in case per channel quantization). - :param atol: Absolute tollerance. - :param rtol: Relative tollerance. + :param atol: Absolute tolerance. + :param rtol: Relative tolerance. """ def to_tensor(a): @@ -214,15 +214,10 @@ def check_outputs_for_quantization_functions(test_val: torch.Tensor, ref_val: np PTTensorListComparator.check_equal(test_val, ref_val, rtol, atol) -@pytest.mark.parametrize( - "input_size", - [[1, 48, 112, 112], [1, 96, 28, 28], [1, 288, 14, 14], [16, 96, 112, 112], [16, 192, 28, 28], [16, 576, 14, 14]], - ids=idfn, -) @pytest.mark.parametrize("bits", (8, 4), ids=("8bit", "4bit")) @pytest.mark.parametrize("scale_mode", ["single_scale", "per_channel_scale"]) @pytest.mark.parametrize("is_fp16", (True, False), ids=("fp16", "fp32")) -class TestParametrized: +class BaseParametrized: class TestSymmetric: @staticmethod def generate_scale(input_size, scale_mode, is_weights, is_fp16, fixed=None): @@ -523,12 +518,12 @@ def test_quantize_asymmetric_backward(self, _seed, input_size, bits, use_cuda, i if is_fp16: # This is needed to make scale == 1 to prevent # quant movement on forward pass in FP16 precision. - # In case scale != 1., not precice scale multiplication in FP16 + # In case scale != 1., not precise scale multiplication in FP16 # could lead to big deviations, so even if an input point # lies in safe range (far from middles of quants) after a scaling # it could end up in the middle of a quant. It happens mostly - # when target quant > 150 because in real life scenarious quantization range - # usualy less than 2 ** quantization bits, + # when target quant > 150 because in real life scenarios quantization range + # usually less than 2 ** quantization bits, # so input is small and scale is big, small FP16 input multiplies big fp16 scale, # deviation is significant. fixed = {} @@ -589,6 +584,21 @@ def test_quantize_asymmetric_backward(self, _seed, input_size, bits, use_cuda, i check_outputs_for_quantization_functions(test_grads, ref_grads, rtol=1e-2 if is_fp16 else 1e-3) +@pytest.mark.parametrize("input_size", [[1, 16, 64, 64], [4, 16, 16, 16]], ids=idfn) +class TestParametrizedFast(BaseParametrized): + pass + + +@pytest.mark.nightly +@pytest.mark.parametrize( + "input_size", + [[1, 48, 112, 112], [1, 96, 28, 28], [1, 288, 14, 14], [16, 96, 112, 112], [16, 192, 28, 28], [16, 576, 14, 14]], + ids=idfn, +) +class TestParametrizedLong(BaseParametrized): + pass + + @pytest.mark.parametrize("device", ["cuda", "cpu"]) def test_mapping_to_zero(quantization_mode, device): torch.manual_seed(42) diff --git a/tests/torch/quantization/test_sanity_sample.py b/tests/torch/quantization/test_sanity_sample.py index 10909d57c59..e42832f1186 100644 --- a/tests/torch/quantization/test_sanity_sample.py +++ b/tests/torch/quantization/test_sanity_sample.py @@ -272,6 +272,7 @@ def fixture_precision_desc(request, dataset_dir): return desc.finalize(dataset_dir) +@pytest.mark.nightly def test_precision_init(precision_desc: PrecisionTestCaseDescriptor, tmp_path, mocker): validator = precision_desc.get_validator() args = validator.get_default_args(tmp_path) @@ -351,6 +352,7 @@ def fixture_export_desc(request): return desc.finalize() +@pytest.mark.nightly @pytest.mark.parametrize( ("extra_args", "is_export_called"), (({}, False), ({"-m": ["export", "train"]}, True)), diff --git a/tests/torch/test_sanity_sample.py b/tests/torch/test_sanity_sample.py index e7df2c2f1ab..42da1e08774 100644 --- a/tests/torch/test_sanity_sample.py +++ b/tests/torch/test_sanity_sample.py @@ -107,6 +107,10 @@ CONFIG_PARAMS.append((sample_type_,) + tpl) +def _get_test_case_id(p) -> str: + return "-".join([p[0], p[1].name, p[2], str(p[3])]) + + def update_compression_algo_dict_with_reduced_bn_adapt_params(algo_dict): if algo_dict["algorithm"] == "rb_sparsity": return @@ -139,469 +143,479 @@ def update_compression_algo_dict_with_legr_save_load_params(nncf_config, tmp_pat return nncf_config -def _get_test_case_id(p) -> str: - return "-".join([p[0], p[1].name, p[2], str(p[3])]) - +def extract_compression_stage_from_checkpoint(last_checkpoint_path: str) -> CompressionStage: + compression_state = torch.load(last_checkpoint_path)[COMPRESSION_STATE_ATTR] + ctrl_state = compression_state[BaseController.CONTROLLER_STATE] + compression_stage = next(iter(ctrl_state.values()))[BaseControllerStateNames.COMPRESSION_STAGE] + return compression_stage -@pytest.fixture(params=CONFIG_PARAMS, name="config", ids=[_get_test_case_id(p) for p in CONFIG_PARAMS]) -def fixture_config(request, dataset_dir): - sample_type, config_path, dataset_name, batch_size = request.param - dataset_path = DATASET_PATHS[sample_type][dataset_name](dataset_dir) - with config_path.open() as f: - jconfig = json.load(f) +def depends_on_pretrained_train(request, test_case_id: str, current_multiprocessing_distributed: bool): + full_test_case_id = test_case_id + ("-distributed" if current_multiprocessing_distributed else "-dataparallel") + primary_test_case_name = f"TestSanitySample::test_pretrained_model_train[{full_test_case_id}]" + depends(request, [primary_test_case_name]) - if "checkpoint_save_dir" in jconfig.keys(): - del jconfig["checkpoint_save_dir"] - # Use a reduced number of BN adaptation samples for speed - if "compression" in jconfig: - if isinstance(jconfig["compression"], list): - algos_list = jconfig["compression"] - for algo_dict in algos_list: - update_compression_algo_dict_with_reduced_bn_adapt_params(algo_dict) - else: - algo_dict = jconfig["compression"] - update_compression_algo_dict_with_reduced_bn_adapt_params(algo_dict) - jconfig["dataset"] = dataset_name - - return { - "sample_type": sample_type, - "sample_config": jconfig, - "model_name": jconfig["model"], - "dataset_path": dataset_path, - "batch_size": batch_size, - "test_case_id": _get_test_case_id(request.param), - } - - -@pytest.fixture(scope="module", name="case_common_dirs") -def fixture_case_common_dirs(tmp_path_factory): - return { - "checkpoint_save_dir": str(tmp_path_factory.mktemp("models")), - "save_coeffs_path": str(tmp_path_factory.mktemp("ranking_coeffs")), - } - - -@pytest.mark.parametrize(" multiprocessing_distributed", (True, False), ids=["distributed", "dataparallel"]) -def test_pretrained_model_eval(config, tmp_path, multiprocessing_distributed, case_common_dirs): - if version.parse(torchvision.__version__) < version.parse("0.13") and "voc" in str(config["dataset_path"]): - pytest.skip( - f"Test calls sample that uses `datasets.VOCDetection.parse_voc_xml` function from latest " - f"torchvision.\nThe signature of the function is not compatible with the corresponding signature " - f"from the current torchvision version : {torchvision.__version__}" - ) - config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") - config_factory.config = update_compression_algo_dict_with_legr_save_load_params( - config_factory.config, case_common_dirs["save_coeffs_path"] - ) - args = { - "--mode": "test", - "--data": config["dataset_path"], - "--config": config_factory.serialize(), - "--log-dir": tmp_path, - "--batch-size": config["batch_size"] * NUM_DEVICES, - "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue - "--dist-url": "tcp://127.0.0.1:8987", - } - - if not torch.cuda.is_available(): - args["--cpu-only"] = True - elif multiprocessing_distributed: - args["--multiprocessing-distributed"] = True - - runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) - runner.run() - - -@pytest.mark.dependency() -@pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) -def test_pretrained_model_train(config, tmp_path, multiprocessing_distributed, case_common_dirs): - checkpoint_save_dir = os.path.join( - case_common_dirs["checkpoint_save_dir"], "distributed" if multiprocessing_distributed else "data_parallel" - ) - config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") - config_factory.config = update_compression_algo_dict_with_legr_save_load_params( - config_factory.config, case_common_dirs["save_coeffs_path"] +def get_resuming_checkpoint_path(config_factory, multiprocessing_distributed, checkpoint_save_dir): + return os.path.join( + checkpoint_save_dir, + "distributed" if multiprocessing_distributed else "data_parallel", + get_run_name(config_factory.config) + "_last.pth", ) - args = { - "--mode": "train", - "--data": config["dataset_path"], - "--config": config_factory.serialize(), - "--log-dir": tmp_path, - "--batch-size": config["batch_size"] * NUM_DEVICES, - "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue - "--epochs": 2, - "--checkpoint-save-dir": checkpoint_save_dir, - "--dist-url": "tcp://127.0.0.1:8989", - } - - if not torch.cuda.is_available(): - args["--cpu-only"] = True - elif multiprocessing_distributed: - args["--multiprocessing-distributed"] = True - elif config["sample_config"]["model"] == "inception_v3": - pytest.skip( - "InceptionV3 may not be trained in DataParallel " - "because it outputs namedtuple, which DP seems to be unable " - "to support even still." - ) - runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) - runner.run() - last_checkpoint_path = os.path.join(checkpoint_save_dir, get_run_name(config_factory.config) + "_last.pth") - assert os.path.exists(last_checkpoint_path) - if "compression" in config["sample_config"]: - allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) - else: - allowed_compression_stages = (CompressionStage.UNCOMPRESSED,) - compression_stage = extract_compression_stage_from_checkpoint(last_checkpoint_path) - assert compression_stage in allowed_compression_stages +@contextmanager +def set_num_threads_locally(n=1): + old_n = torch.get_num_threads() + try: + torch.set_num_threads(n) + yield + finally: + torch.set_num_threads(old_n) -def depends_on_pretrained_train(request, test_case_id: str, current_multiprocessing_distributed: bool): - full_test_case_id = test_case_id + ("-distributed" if current_multiprocessing_distributed else "-dataparallel") - primary_test_case_name = f"test_pretrained_model_train[{full_test_case_id}]" - depends(request, [primary_test_case_name]) +def _run_with_xfail_119128(runner: Command): + returncode = runner.run(assert_returncode_zero=False) + if returncode == 139: + pytest.xfail("Bug 119128: sporadic segment fault on backward") -@pytest.mark.dependency() -@pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) -def test_trained_model_eval(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): - if version.parse(torchvision.__version__) < version.parse("0.13") and "voc" in str(config["dataset_path"]): - pytest.skip( - f"Test calls sample that uses `datasets.VOCDetection.parse_voc_xml` function from latest " - f"torchvision.\nThe signature of the function is not compatible with the corresponding signature " - f"from the current torchvision version : {torchvision.__version__}" - ) - depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) - config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") - config_factory.config = update_compression_algo_dict_with_legr_save_load_params( - config_factory.config, case_common_dirs["save_coeffs_path"] - ) +@pytest.mark.nightly +class TestSanitySample: + @staticmethod + @pytest.fixture(params=CONFIG_PARAMS, name="config", ids=[_get_test_case_id(p) for p in CONFIG_PARAMS]) + def fixture_config(request, dataset_dir): + sample_type, config_path, dataset_name, batch_size = request.param + dataset_path = DATASET_PATHS[sample_type][dataset_name](dataset_dir) - ckpt_path = os.path.join( - case_common_dirs["checkpoint_save_dir"], - "distributed" if multiprocessing_distributed else "data_parallel", - get_run_name(config_factory.config) + "_last.pth", - ) - args = { - "--mode": "test", - "--data": config["dataset_path"], - "--config": config_factory.serialize(), - "--log-dir": tmp_path, - "--batch-size": config["batch_size"] * NUM_DEVICES, - "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue - "--weights": ckpt_path, - "--dist-url": "tcp://127.0.0.1:8987", - } - - if not torch.cuda.is_available(): - args["--cpu-only"] = True - elif multiprocessing_distributed: - args["--multiprocessing-distributed"] = True - - runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) - runner.run() + with config_path.open() as f: + jconfig = json.load(f) + if "checkpoint_save_dir" in jconfig.keys(): + del jconfig["checkpoint_save_dir"] -def get_resuming_checkpoint_path(config_factory, multiprocessing_distributed, checkpoint_save_dir): - return os.path.join( - checkpoint_save_dir, - "distributed" if multiprocessing_distributed else "data_parallel", - get_run_name(config_factory.config) + "_last.pth", - ) + # Use a reduced number of BN adaptation samples for speed + if "compression" in jconfig: + if isinstance(jconfig["compression"], list): + algos_list = jconfig["compression"] + for algo_dict in algos_list: + update_compression_algo_dict_with_reduced_bn_adapt_params(algo_dict) + else: + algo_dict = jconfig["compression"] + update_compression_algo_dict_with_reduced_bn_adapt_params(algo_dict) + jconfig["dataset"] = dataset_name + + return { + "sample_type": sample_type, + "sample_config": jconfig, + "model_name": jconfig["model"], + "dataset_path": dataset_path, + "batch_size": batch_size, + "test_case_id": _get_test_case_id(request.param), + } + @staticmethod + @pytest.fixture(scope="module", name="case_common_dirs") + def fixture_case_common_dirs(tmp_path_factory): + return { + "checkpoint_save_dir": str(tmp_path_factory.mktemp("models")), + "save_coeffs_path": str(tmp_path_factory.mktemp("ranking_coeffs")), + } -@pytest.mark.dependency() -@pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) -def test_resume(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): - depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) - checkpoint_save_dir = os.path.join(str(tmp_path), "models") - config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") - config_factory.config = update_compression_algo_dict_with_legr_save_load_params( - config_factory.config, case_common_dirs["save_coeffs_path"], False - ) + @staticmethod + @pytest.mark.parametrize(" multiprocessing_distributed", (True, False), ids=["distributed", "dataparallel"]) + def test_pretrained_model_eval(config, tmp_path, multiprocessing_distributed, case_common_dirs): + if version.parse(torchvision.__version__) < version.parse("0.13") and "voc" in str(config["dataset_path"]): + pytest.skip( + f"Test calls sample that uses `datasets.VOCDetection.parse_voc_xml` function from latest " + f"torchvision.\nThe signature of the function is not compatible with the corresponding signature " + f"from the current torchvision version : {torchvision.__version__}" + ) + config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") + config_factory.config = update_compression_algo_dict_with_legr_save_load_params( + config_factory.config, case_common_dirs["save_coeffs_path"] + ) + args = { + "--mode": "test", + "--data": config["dataset_path"], + "--config": config_factory.serialize(), + "--log-dir": tmp_path, + "--batch-size": config["batch_size"] * NUM_DEVICES, + "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue + "--dist-url": "tcp://127.0.0.1:8987", + } - ckpt_path = get_resuming_checkpoint_path( - config_factory, multiprocessing_distributed, case_common_dirs["checkpoint_save_dir"] - ) - if "max_iter" in config_factory.config: - config_factory.config["max_iter"] += 2 - args = { - "--mode": "train", - "--data": config["dataset_path"], - "--config": config_factory.serialize(), - "--log-dir": tmp_path, - "--batch-size": config["batch_size"] * NUM_DEVICES, - "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue - "--epochs": 3, - "--checkpoint-save-dir": checkpoint_save_dir, - "--resume": ckpt_path, - "--dist-url": "tcp://127.0.0.1:8986", - } - - if not torch.cuda.is_available(): - args["--cpu-only"] = True - elif multiprocessing_distributed: - args["--multiprocessing-distributed"] = True - - runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) - runner.run() - last_checkpoint_path = os.path.join(checkpoint_save_dir, get_run_name(config_factory.config) + "_last.pth") - assert os.path.exists(last_checkpoint_path) - if "compression" in config["sample_config"]: - allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) - else: - allowed_compression_stages = (CompressionStage.UNCOMPRESSED,) - compression_stage = extract_compression_stage_from_checkpoint(last_checkpoint_path) - assert compression_stage in allowed_compression_stages + if not torch.cuda.is_available(): + args["--cpu-only"] = True + elif multiprocessing_distributed: + args["--multiprocessing-distributed"] = True + runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) + runner.run() -def extract_compression_stage_from_checkpoint(last_checkpoint_path: str) -> CompressionStage: - compression_state = torch.load(last_checkpoint_path)[COMPRESSION_STATE_ATTR] - ctrl_state = compression_state[BaseController.CONTROLLER_STATE] - compression_stage = next(iter(ctrl_state.values()))[BaseControllerStateNames.COMPRESSION_STAGE] - return compression_stage + @staticmethod + @pytest.mark.dependency() + @pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) + def test_pretrained_model_train(config, tmp_path, multiprocessing_distributed, case_common_dirs): + checkpoint_save_dir = os.path.join( + case_common_dirs["checkpoint_save_dir"], "distributed" if multiprocessing_distributed else "data_parallel" + ) + config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") + config_factory.config = update_compression_algo_dict_with_legr_save_load_params( + config_factory.config, case_common_dirs["save_coeffs_path"] + ) + args = { + "--mode": "train", + "--data": config["dataset_path"], + "--config": config_factory.serialize(), + "--log-dir": tmp_path, + "--batch-size": config["batch_size"] * NUM_DEVICES, + "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue + "--epochs": 2, + "--checkpoint-save-dir": checkpoint_save_dir, + "--dist-url": "tcp://127.0.0.1:8989", + } -@pytest.mark.dependency() -@pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) -def test_export_with_resume(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): - depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) - config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") - config_factory.config = update_compression_algo_dict_with_legr_save_load_params( - config_factory.config, case_common_dirs["save_coeffs_path"], False - ) + if not torch.cuda.is_available(): + args["--cpu-only"] = True + elif multiprocessing_distributed: + args["--multiprocessing-distributed"] = True + elif config["sample_config"]["model"] == "inception_v3": + pytest.skip( + "InceptionV3 may not be trained in DataParallel " + "because it outputs namedtuple, which DP seems to be unable " + "to support even still." + ) + + runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) + _run_with_xfail_119128(runner) + last_checkpoint_path = os.path.join(checkpoint_save_dir, get_run_name(config_factory.config) + "_last.pth") + assert os.path.exists(last_checkpoint_path) + if "compression" in config["sample_config"]: + allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) + else: + allowed_compression_stages = (CompressionStage.UNCOMPRESSED,) + compression_stage = extract_compression_stage_from_checkpoint(last_checkpoint_path) + assert compression_stage in allowed_compression_stages + + @staticmethod + @pytest.mark.dependency() + @pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) + def test_trained_model_eval(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): + if version.parse(torchvision.__version__) < version.parse("0.13") and "voc" in str(config["dataset_path"]): + pytest.skip( + f"Test calls sample that uses `datasets.VOCDetection.parse_voc_xml` function from latest " + f"torchvision.\nThe signature of the function is not compatible with the corresponding signature " + f"from the current torchvision version : {torchvision.__version__}" + ) + depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) + config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") + config_factory.config = update_compression_algo_dict_with_legr_save_load_params( + config_factory.config, case_common_dirs["save_coeffs_path"] + ) - ckpt_path = get_resuming_checkpoint_path( - config_factory, multiprocessing_distributed, case_common_dirs["checkpoint_save_dir"] - ) + ckpt_path = os.path.join( + case_common_dirs["checkpoint_save_dir"], + "distributed" if multiprocessing_distributed else "data_parallel", + get_run_name(config_factory.config) + "_last.pth", + ) + args = { + "--mode": "test", + "--data": config["dataset_path"], + "--config": config_factory.serialize(), + "--log-dir": tmp_path, + "--batch-size": config["batch_size"] * NUM_DEVICES, + "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue + "--weights": ckpt_path, + "--dist-url": "tcp://127.0.0.1:8987", + } - onnx_path = os.path.join(str(tmp_path), "model.onnx") - args = {"--mode": "export", "--config": config_factory.serialize(), "--resume": ckpt_path, "--to-onnx": onnx_path} + if not torch.cuda.is_available(): + args["--cpu-only"] = True + elif multiprocessing_distributed: + args["--multiprocessing-distributed"] = True + + runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) + runner.run() + + @staticmethod + @pytest.mark.dependency() + @pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) + def test_resume(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): + depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) + checkpoint_save_dir = os.path.join(str(tmp_path), "models") + config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") + config_factory.config = update_compression_algo_dict_with_legr_save_load_params( + config_factory.config, case_common_dirs["save_coeffs_path"], False + ) - if not torch.cuda.is_available(): - args["--cpu-only"] = True + ckpt_path = get_resuming_checkpoint_path( + config_factory, multiprocessing_distributed, case_common_dirs["checkpoint_save_dir"] + ) + if "max_iter" in config_factory.config: + config_factory.config["max_iter"] += 2 + args = { + "--mode": "train", + "--data": config["dataset_path"], + "--config": config_factory.serialize(), + "--log-dir": tmp_path, + "--batch-size": config["batch_size"] * NUM_DEVICES, + "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue + "--epochs": 3, + "--checkpoint-save-dir": checkpoint_save_dir, + "--resume": ckpt_path, + "--dist-url": "tcp://127.0.0.1:8986", + } - runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) - runner.run() - assert os.path.exists(onnx_path) + if not torch.cuda.is_available(): + args["--cpu-only"] = True + elif multiprocessing_distributed: + args["--multiprocessing-distributed"] = True + + runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) + _run_with_xfail_119128(runner) + last_checkpoint_path = os.path.join(checkpoint_save_dir, get_run_name(config_factory.config) + "_last.pth") + assert os.path.exists(last_checkpoint_path) + if "compression" in config["sample_config"]: + allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) + else: + allowed_compression_stages = (CompressionStage.UNCOMPRESSED,) + compression_stage = extract_compression_stage_from_checkpoint(last_checkpoint_path) + assert compression_stage in allowed_compression_stages + + @staticmethod + @pytest.mark.dependency() + @pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) + def test_export_with_resume(request, config, tmp_path, multiprocessing_distributed, case_common_dirs): + depends_on_pretrained_train(request, config["test_case_id"], multiprocessing_distributed) + config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") + config_factory.config = update_compression_algo_dict_with_legr_save_load_params( + config_factory.config, case_common_dirs["save_coeffs_path"], False + ) + ckpt_path = get_resuming_checkpoint_path( + config_factory, multiprocessing_distributed, case_common_dirs["checkpoint_save_dir"] + ) -def test_export_with_pretrained(tmp_path): - config = SampleConfig() - config.update( - { - "model": "resnet18", - "dataset": "imagenet", - "input_info": {"sample_size": [2, 3, 299, 299]}, - "num_classes": 1000, - "compression": {"algorithm": "magnitude_sparsity"}, + onnx_path = os.path.join(str(tmp_path), "model.onnx") + args = { + "--mode": "export", + "--config": config_factory.serialize(), + "--resume": ckpt_path, + "--to-onnx": onnx_path, } - ) - config_factory = ConfigFactory(config, tmp_path / "config.json") - onnx_path = os.path.join(str(tmp_path), "model.onnx") - args = {"--mode": "export", "--config": config_factory.serialize(), "--pretrained": "", "--to-onnx": onnx_path} + if not torch.cuda.is_available(): + args["--cpu-only"] = True + + runner = Command(create_command_line(args, config["sample_type"]), env=ROOT_PYTHONPATH_ENV) + runner.run() + assert os.path.exists(onnx_path) + + @staticmethod + def test_export_with_pretrained(tmp_path): + config = SampleConfig() + config.update( + { + "model": "resnet18", + "dataset": "imagenet", + "input_info": {"sample_size": [2, 3, 299, 299]}, + "num_classes": 1000, + "compression": {"algorithm": "magnitude_sparsity"}, + } + ) + config_factory = ConfigFactory(config, tmp_path / "config.json") - if not torch.cuda.is_available(): - args["--cpu-only"] = True + onnx_path = os.path.join(str(tmp_path), "model.onnx") + args = {"--mode": "export", "--config": config_factory.serialize(), "--pretrained": "", "--to-onnx": onnx_path} - runner = Command(create_command_line(args, "classification"), env=ROOT_PYTHONPATH_ENV) - runner.run() - assert os.path.exists(onnx_path) + if not torch.cuda.is_available(): + args["--cpu-only"] = True + runner = Command(create_command_line(args, "classification"), env=ROOT_PYTHONPATH_ENV) + runner.run() + assert os.path.exists(onnx_path) -@pytest.mark.parametrize( - ("algo", "ref_weight_decay"), - (("rb_sparsity", 0), ("const_sparsity", 1e-4), ("magnitude_sparsity", 1e-4), ("quantization", 1e-4)), -) -def test_get_default_weight_decay(algo, ref_weight_decay): - config = NNCFConfig() - config.update({"compression": {"algorithm": algo}}) - assert ref_weight_decay == get_default_weight_decay(config) + @staticmethod + @pytest.mark.parametrize( + ("algo", "ref_weight_decay"), + (("rb_sparsity", 0), ("const_sparsity", 1e-4), ("magnitude_sparsity", 1e-4), ("quantization", 1e-4)), + ) + def test_get_default_weight_decay(algo, ref_weight_decay): + config = NNCFConfig() + config.update({"compression": {"algorithm": algo}}) + assert ref_weight_decay == get_default_weight_decay(config) + + @staticmethod + def test_cpu_only_mode_produces_cpu_only_model(config, tmp_path, mocker): + config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") + args = { + "--data": config["dataset_path"], + "--config": config_factory.serialize(), + "--log-dir": tmp_path, + "--batch-size": config["batch_size"] * NUM_DEVICES, + "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue + "--epochs": 1, + "--cpu-only": True, + } + # to prevent starting a not closed mlflow session due to memory leak of config and SafeMLFLow happens with a + # mocked train function + mocker.patch("examples.torch.common.utils.SafeMLFLow") + arg_list = arg_list_from_arg_dict(args) + if config["sample_type"] == "classification": + import examples.torch.classification.main as sample -@contextmanager -def set_num_threads_locally(n=1): - old_n = torch.get_num_threads() - try: - torch.set_num_threads(n) - yield - finally: - torch.set_num_threads(old_n) + if is_staged_quantization(config["sample_config"]): + mocker.patch("examples.torch.classification.staged_quantization_worker.train_epoch_staged") + mocker.patch("examples.torch.classification.staged_quantization_worker.validate") + import examples.torch.classification.staged_quantization_worker as staged_worker + staged_worker.validate.return_value = (0, 0, 0) + else: + mocker.patch("examples.torch.classification.main.train_epoch") + mocker.patch("examples.torch.classification.main.validate") + sample.validate.return_value = (0, 0, 0) + elif config["sample_type"] == "semantic_segmentation": + import examples.torch.semantic_segmentation.main as sample + import examples.torch.semantic_segmentation.train + + mocker.spy(examples.torch.semantic_segmentation.train.Train, "__init__") + elif config["sample_type"] == "object_detection": + import examples.torch.object_detection.main as sample + + mocker.spy(sample, "train") + + # Set number of threads = 1 to avoid hang for UNet (ticket 100106). + # Potentially it might happen when OpenMP is used before fork. + # The relevant thread: https://github.com/pytorch/pytorch/issues/91547 + with set_num_threads_locally(1) if config["sample_type"] == "semantic_segmentation" else nullcontext(): + sample.main(arg_list) + + # pylint: disable=no-member + if config["sample_type"] == "classification": + if is_staged_quantization(config["sample_config"]): + import examples.torch.classification.staged_quantization_worker as staged_worker + + model_to_be_trained = staged_worker.train_epoch_staged.call_args[0][2] # model + else: + model_to_be_trained = sample.train_epoch.call_args[0][1] # model + elif config["sample_type"] == "semantic_segmentation": + model_to_be_trained = examples.torch.semantic_segmentation.train.Train.__init__.call_args[0][1] # model + elif config["sample_type"] == "object_detection": + model_to_be_trained = sample.train.call_args[0][0] # net + + for p in model_to_be_trained.parameters(): + assert not p.is_cuda + + @staticmethod + @pytest.mark.parametrize("target_device", [x.value for x in HWConfigType]) + def test_sample_propagates_target_device_cl_param_to_nncf_config(mocker, tmp_path, target_device): + config_dict = { + "input_info": { + "sample_size": [1, 1, 32, 32], + }, + "compression": {"algorithm": "quantization"}, + } + config_factory = ConfigFactory(config_dict, tmp_path / "config.json") + args = { + "--data": str(tmp_path), + "--config": config_factory.serialize(), + "--log-dir": tmp_path, + "--batch-size": 1, + "--target-device": target_device, + } + if not torch.cuda.is_available(): + args["--cpu-only"] = True -def test_cpu_only_mode_produces_cpu_only_model(config, tmp_path, mocker): - config_factory = ConfigFactory(config["sample_config"], tmp_path / "config.json") - args = { - "--data": config["dataset_path"], - "--config": config_factory.serialize(), - "--log-dir": tmp_path, - "--batch-size": config["batch_size"] * NUM_DEVICES, - "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue - "--epochs": 1, - "--cpu-only": True, - } - - # to prevent starting a not closed mlflow session due to memory leak of config and SafeMLFLow happens with a - # mocked train function - mocker.patch("examples.torch.common.utils.SafeMLFLow") - arg_list = arg_list_from_arg_dict(args) - if config["sample_type"] == "classification": + arg_list = arg_list_from_arg_dict(args) import examples.torch.classification.main as sample - if is_staged_quantization(config["sample_config"]): - mocker.patch("examples.torch.classification.staged_quantization_worker.train_epoch_staged") - mocker.patch("examples.torch.classification.staged_quantization_worker.validate") - import examples.torch.classification.staged_quantization_worker as staged_worker - - staged_worker.validate.return_value = (0, 0, 0) - else: - mocker.patch("examples.torch.classification.main.train_epoch") - mocker.patch("examples.torch.classification.main.validate") - sample.validate.return_value = (0, 0, 0) - elif config["sample_type"] == "semantic_segmentation": - import examples.torch.semantic_segmentation.main as sample - import examples.torch.semantic_segmentation.train - - mocker.spy(examples.torch.semantic_segmentation.train.Train, "__init__") - elif config["sample_type"] == "object_detection": - import examples.torch.object_detection.main as sample - - mocker.spy(sample, "train") - - # Set number of threads = 1 to avoid hang for UNet (ticket 100106). - # Potentially it might happen when OpenMP is used before fork. - # The relevant thread: https://github.com/pytorch/pytorch/issues/91547 - with set_num_threads_locally(1) if config["sample_type"] == "semantic_segmentation" else nullcontext(): + start_worker_mock = mocker.patch("examples.torch.classification.main.start_worker") sample.main(arg_list) - # pylint: disable=no-member - if config["sample_type"] == "classification": - if is_staged_quantization(config["sample_config"]): - import examples.torch.classification.staged_quantization_worker as staged_worker + config = start_worker_mock.call_args[0][1].nncf_config + assert config["target_device"] == target_device - model_to_be_trained = staged_worker.train_epoch_staged.call_args[0][2] # model - else: - model_to_be_trained = sample.train_epoch.call_args[0][1] # model - elif config["sample_type"] == "semantic_segmentation": - model_to_be_trained = examples.torch.semantic_segmentation.train.Train.__init__.call_args[0][1] # model - elif config["sample_type"] == "object_detection": - model_to_be_trained = sample.train.call_args[0][0] # net - - for p in model_to_be_trained.parameters(): - assert not p.is_cuda - - -@pytest.mark.parametrize("target_device", [x.value for x in HWConfigType]) -def test_sample_propagates_target_device_cl_param_to_nncf_config(mocker, tmp_path, target_device): - config_dict = { - "input_info": { - "sample_size": [1, 1, 32, 32], - }, - "compression": {"algorithm": "quantization"}, - } - config_factory = ConfigFactory(config_dict, tmp_path / "config.json") - args = { - "--data": str(tmp_path), - "--config": config_factory.serialize(), - "--log-dir": tmp_path, - "--batch-size": 1, - "--target-device": target_device, - } - if not torch.cuda.is_available(): - args["--cpu-only"] = True - - arg_list = arg_list_from_arg_dict(args) - import examples.torch.classification.main as sample - - start_worker_mock = mocker.patch("examples.torch.classification.main.start_worker") - sample.main(arg_list) - - config = start_worker_mock.call_args[0][1].nncf_config - assert config["target_device"] == target_device - - -@pytest.fixture( - name="accuracy_aware_config", - params=[ - TEST_ROOT / "torch" / "data" / "configs" / "resnet18_pruning_accuracy_aware.json", - TEST_ROOT / "torch" / "data" / "configs" / "resnet18_int8_accuracy_aware.json", - ], -) -def fixture_accuracy_aware_config(request): - config_path = request.param - with config_path.open() as f: - jconfig = json.load(f) - - dataset_name = "mock_32x32" - dataset_path = os.path.join("/tmp", "mock_32x32") - sample_type = "classification" - - jconfig["dataset"] = dataset_name - - return { - "sample_type": sample_type, - "sample_config": jconfig, - "model_name": jconfig["model"], - "dataset_path": dataset_path, - "batch_size": 12, - } - - -@pytest.mark.dependency() -@pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) -def test_accuracy_aware_training_pipeline(accuracy_aware_config, tmp_path, multiprocessing_distributed): - config_factory = ConfigFactory(accuracy_aware_config["sample_config"], tmp_path / "config.json") - log_dir = tmp_path / "accuracy_aware" - log_dir = log_dir / "distributed" if multiprocessing_distributed else log_dir / "dataparallel" - - args = { - "--mode": "train", - "--data": accuracy_aware_config["dataset_path"], - "--config": config_factory.serialize(), - "--log-dir": log_dir, - "--batch-size": accuracy_aware_config["batch_size"] * NUM_DEVICES, - "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue - "--epochs": 2, - "--dist-url": "tcp://127.0.0.1:8989", - } - - if not torch.cuda.is_available(): - args["--cpu-only"] = True - elif multiprocessing_distributed: - args["--multiprocessing-distributed"] = True - - runner = Command(create_command_line(args, accuracy_aware_config["sample_type"]), env=ROOT_PYTHONPATH_ENV) - runner.run() - - checkpoint_save_dir = log_dir / get_run_name(config_factory.config) - aa_checkpoint_path = get_accuracy_aware_checkpoint_dir_path(checkpoint_save_dir) - last_checkpoint_path = aa_checkpoint_path / "acc_aware_checkpoint_last.pth" - - assert last_checkpoint_path.exists() - if "compression" in accuracy_aware_config["sample_config"]: - allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) - else: - allowed_compression_stages = (CompressionStage.UNCOMPRESSED,) - compression_stage = extract_compression_stage_from_checkpoint(str(last_checkpoint_path)) - assert compression_stage in allowed_compression_stages + @staticmethod + @pytest.fixture( + name="accuracy_aware_config", + params=[ + TEST_ROOT / "torch" / "data" / "configs" / "resnet18_pruning_accuracy_aware.json", + TEST_ROOT / "torch" / "data" / "configs" / "resnet18_int8_accuracy_aware.json", + ], + ) + def fixture_accuracy_aware_config(request): + config_path = request.param + with config_path.open() as f: + jconfig = json.load(f) + + dataset_name = "mock_32x32" + dataset_path = os.path.join("/tmp", "mock_32x32") + sample_type = "classification" + + jconfig["dataset"] = dataset_name + + return { + "sample_type": sample_type, + "sample_config": jconfig, + "model_name": jconfig["model"], + "dataset_path": dataset_path, + "batch_size": 12, + } + @staticmethod + @pytest.mark.dependency() + @pytest.mark.parametrize("multiprocessing_distributed", [True, False], ids=["distributed", "dataparallel"]) + def test_accuracy_aware_training_pipeline(accuracy_aware_config, tmp_path, multiprocessing_distributed): + config_factory = ConfigFactory(accuracy_aware_config["sample_config"], tmp_path / "config.json") + log_dir = tmp_path / "accuracy_aware" + log_dir = log_dir / "distributed" if multiprocessing_distributed else log_dir / "dataparallel" + + args = { + "--mode": "train", + "--data": accuracy_aware_config["dataset_path"], + "--config": config_factory.serialize(), + "--log-dir": log_dir, + "--batch-size": accuracy_aware_config["batch_size"] * NUM_DEVICES, + "--workers": 0, # Workaround for the PyTorch MultiProcessingDataLoader issue + "--epochs": 2, + "--dist-url": "tcp://127.0.0.1:8989", + } -@pytest.mark.parametrize("sample_type", SAMPLE_TYPES) -def test_eval_only_config_fails_to_train(tmp_path, sample_type): - config_factory = ConfigFactory( - {"model": "mock", "input_infos": {"sample_size": [1, 1, 1, 1]}, "eval_only": True}, tmp_path / "config.json" - ) - args = { - "--mode": "train", - "--config": config_factory.serialize(), - } - - runner = Command(create_command_line(args, sample_type), env=ROOT_PYTHONPATH_ENV) - return_code = runner.run(assert_returncode_zero=False) - assert return_code != 0 - assert remove_line_breaks(EVAL_ONLY_ERROR_TEXT) in remove_line_breaks("".join(runner.output)) + if not torch.cuda.is_available(): + args["--cpu-only"] = True + elif multiprocessing_distributed: + args["--multiprocessing-distributed"] = True + + runner = Command(create_command_line(args, accuracy_aware_config["sample_type"]), env=ROOT_PYTHONPATH_ENV) + _run_with_xfail_119128(runner) + + checkpoint_save_dir = log_dir / get_run_name(config_factory.config) + aa_checkpoint_path = get_accuracy_aware_checkpoint_dir_path(checkpoint_save_dir) + last_checkpoint_path = aa_checkpoint_path / "acc_aware_checkpoint_last.pth" + + assert last_checkpoint_path.exists() + if "compression" in accuracy_aware_config["sample_config"]: + allowed_compression_stages = (CompressionStage.FULLY_COMPRESSED, CompressionStage.PARTIALLY_COMPRESSED) + else: + allowed_compression_stages = (CompressionStage.UNCOMPRESSED,) + compression_stage = extract_compression_stage_from_checkpoint(str(last_checkpoint_path)) + assert compression_stage in allowed_compression_stages + + @staticmethod + @pytest.mark.parametrize("sample_type", SAMPLE_TYPES) + def test_eval_only_config_fails_to_train(tmp_path, sample_type): + config_factory = ConfigFactory( + {"model": "mock", "input_infos": {"sample_size": [1, 1, 1, 1]}, "eval_only": True}, tmp_path / "config.json" + ) + args = { + "--mode": "train", + "--config": config_factory.serialize(), + } + + runner = Command(create_command_line(args, sample_type), env=ROOT_PYTHONPATH_ENV) + return_code = runner.run(assert_returncode_zero=False) + assert return_code != 0 + assert remove_line_breaks(EVAL_ONLY_ERROR_TEXT) in remove_line_breaks("".join(runner.output)) From d54d47db37d66dcee022585b69f68aa266ddf078 Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Mon, 11 Sep 2023 16:35:27 +0400 Subject: [PATCH 07/15] Update README.md (#2125) ### Changes Added the link to Quantization with accuracy control using NNCF notebooks. ### Reason for changes Customer adoption ### Related tickets N/A ### Tests N/A --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7605785a338..d6d0f20afbd 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,7 @@ A collection of ready-to-run Jupyter* notebooks are available to demonstrate how - [Optimizing PyTorch models with NNCF of OpenVINO by 8-bit quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/302-pytorch-quantization-aware-training) - [Optimizing TensorFlow models with NNCF of OpenVINO by 8-bit quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/305-tensorflow-quantization-aware-training) - [Accelerate Inference of Sparse Transformer Models with OpenVINO and 4th Gen Intel Xeon Scalable Processors](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/116-sparsity-optimization) +- [Quantization with accuracy control using NNCF](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control) ### Post-Training Quantization Samples From 9d8ed96a8bb0ff7a9d3fd0a8d7aa48c0eb3c50e7 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Mon, 11 Sep 2023 15:19:05 +0200 Subject: [PATCH 08/15] Update ReleaseNotes with 2.6.0 (#2111) --- ReleaseNotes.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index dd8d13bcc61..4d9f3d83b6a 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,5 +1,65 @@ # Release Notes +## New in Release 2.6.0 + +Post-training Quantization: + +- Features: + - Added `CPU_SPR` device type support. + - Added quantizers scales unification. + - Added quantization scheme for ReduceSum operation. + - Added new types (ReduceL2, ReduceSum, Maximum) to the ignored scope for `ModelType.Transformer`. + - (OpenVINO) Added SmoothQuant algorithm. + - (OpenVINO) Added ChannelAlignment algorithm. + - (OpenVINO) Added HyperparameterTuner algorithm. + - (PyTorch) Added FastBiasCorrection algorithm support. + - (OpenVINO, ONNX) Added embedding weights quantization. + - (OpenVINO, PyTorch) Added new `compress_weights` method that provides data-free [INT8 weights compression](docs/compression_algorithms/CompressWeights.md). +- Fixes: + - Fixed detection of decomposed post-processing in models. + - Multiple fixes (new patterns, bugfixes, etc.) to solve [#1936](https://github.com/openvinotoolkit/nncf/issues/1936) issue. + - Fixed model reshaping while quantization to keep original model shape. + - (OpenVINO) Added support for sequential models quanitzation. + - (OpenVINO) Fixed in-place statistics cast to support empty dimensions. + - (OpenVINO, ONNX) Fixed quantization of the MatMul operation with weights rank > 2. + - (OpenVINO, ONNX) Fixed BiasCorrection algorithm to enable [CLIP model quantization](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification). +- Improvements: + - Optimized `quantize(…)` pipeline (up to 4.3x speed up in total). + - Optimized `quantize_with_accuracy_control(…)` pipelilne (up to 8x speed up for [122-quantizing-model-with-accuracy-control](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control) notebook). + - Optimized general statistics collection (up to 1.2x speed up for ONNX backend). + - Ignored patterns separated from Fused patterns scheme (with multiple patterns addition). +- Tutorials: + - [Post-Training Optimization of Segment Anything Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/237-segment-anything). + - [Post-Training Optimization of CLIP Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/228-clip-zero-shot-image-classification). + - [Post-Training Optimization of ImageBind Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/239-image-bind). + - [Post-Training Optimization of Whisper Model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/227-whisper-subtitles-generation). + - [Post-Training Optimization with accuracy control](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/122-quantizing-model-with-accuracy-control). + +Compression-aware training: + +- Features: + - Added shape pruning processor for BootstrapNAS algorithm. + - Added KD loss for BootstrapNAS algorithm. + - Added `validate_scopes` parameter for NNCF configuration. + - (PyTorch) Added PyTorch 2.0 support. + - (PyTorch) Added `.strip()` option to API. + - (PyTorch) Enabled bfloat data type for quantization kernels. + - (PyTorch) Quantized models can now be `torch.jit.trace`d without calling `.strip()`. + - (PyTorch) Added support for overridden `forward` instance attribute on model objects passed into `create_compressed_model`. + - (Tensorflow) Added Tensorflow 2.12 support. +- Fixes: + - (PyTorch) Fixed padding adjustment issue in the elastic kernel to work with the different active kernel sizes. + - (PyTorch) Fixed the torch graph tracing in the case the tensors belonging to parallel edges are interleaved in the order of the tensor argument. + - (PyTorch) Fixed recurrent nodes matching (LSTM, GRU cells) condition with the strict rule to avoid adding not necessary nodes to the ignored scope. + - (PyTorch) Fixed `torch.jit.script` wrapper so that user-side handling exceptions during `torch.jit.script` invocation do not cause NNCF to be permanently disabled. + - (PyTorch, Tensorflow) Adjusted quantizer propagation algorithm to check if quantizer propagation will result in output quantization. + - (PyTorch) Added redefined `__class__` method for ProxyModule that avoids causing error while calling `.super()` in forward method. +- Deprecations/Removals: + - (PyTorch) Removed deprecated `NNCFNetwork.__getattr__`, `NNCFNetwork.get_nncf_wrapped_model` methods. +- Requirements: + - Updated PyTorch version (2.0.1). + - Updated Tensorflow version (2.12.0). + ## New in Release 2.5.0 Post-training Quantization: From 25968cdd43f084fae2767875769ee4fd31888717 Mon Sep 17 00:00:00 2001 From: andreyanufr Date: Mon, 11 Sep 2023 16:19:18 +0200 Subject: [PATCH 09/15] Fixed problem with shared weights in compression. (#2110) ### Changes Fixed problem with shared weights in compression. ### Reason for changes Problem with some LLMs with shared weights. ### Related tickets ### Tests --- .../torch/quantization/weights_compression.py | 20 ++++++++++--- tests/torch/ptq/test_weights_compression.py | 29 ++++++++++++++++++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/nncf/torch/quantization/weights_compression.py b/nncf/torch/quantization/weights_compression.py index 2d191333beb..9fc725fb235 100644 --- a/nncf/torch/quantization/weights_compression.py +++ b/nncf/torch/quantization/weights_compression.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Optional +from typing import Dict, List, Optional import torch from torch import nn @@ -39,7 +39,7 @@ def forward(self, layer, op_arg): def _insert_pre_compression_operations( - module: nn.Module, allowed_types: List, level_high: int = 255 + module: nn.Module, allowed_types: List, level_high: int = 255, compression_hist: Dict = None ) -> Optional[nn.Module]: """ Inserts weights compression with dequantization for layers in `allowed_types`. @@ -47,12 +47,22 @@ def _insert_pre_compression_operations( :param module: The module to insert the weights compression. :param allowed_types: list of allowed types for weights compression. :param level_high: highest possible value of compressed weights (lower is 0 in assymetric quantization). + :param compression_hist: mapping between layer weight and corresponding WeightsDecompressor for finding + shared weights. :return: The non-trainable module with inserted operations. """ + if compression_hist is None: + compression_hist = {} for _, layer in module.named_children(): if not type(layer) in allowed_types: - _insert_pre_compression_operations(layer, allowed_types, level_high) + _insert_pre_compression_operations(layer, allowed_types, level_high, compression_hist) continue + + if layer.weight.dtype in [torch.uint8, torch.int8]: + if layer.weight in compression_hist: + layer.register_pre_forward_operation(compression_hist[layer.weight]) + continue + target_dim = layer.target_weight_dim_for_compression stat_dim = (target_dim + 1) % 2 input_low = torch.min(layer.weight, dim=stat_dim).values.detach() @@ -61,7 +71,7 @@ def _insert_pre_compression_operations( scale = scale.unsqueeze(stat_dim) zero_point = zero_point.unsqueeze(stat_dim) - layer.register_pre_forward_operation(WeightsDecompressor(zero_point, scale)) + key = layer.register_pre_forward_operation(WeightsDecompressor(zero_point, scale)) compressed_weight = layer.weight.data / scale + zero_point compressed_weight = torch.clamp(torch.round(compressed_weight), 0, level_high) @@ -69,6 +79,8 @@ def _insert_pre_compression_operations( layer.weight.requires_grad = False layer.weight.data = compressed_weight.type(dtype=torch.uint8) + compression_hist[layer.weight] = layer.get_pre_op(key) + def insert_pre_compression_operations(module: nn.Module, bits: int = 8) -> Optional[nn.Module]: """ diff --git a/tests/torch/ptq/test_weights_compression.py b/tests/torch/ptq/test_weights_compression.py index 72427191abe..e71394e9284 100644 --- a/tests/torch/ptq/test_weights_compression.py +++ b/tests/torch/ptq/test_weights_compression.py @@ -15,12 +15,15 @@ class ShortTransformer(torch.nn.Module): - def __init__(self, in_features, num_embeddings): + def __init__(self, in_features, num_embeddings, share_weights=False): super().__init__() self.wte = torch.nn.Embedding(num_embeddings, in_features) self.linear = torch.nn.Linear(in_features, in_features) self.lm_head = torch.nn.Linear(in_features, num_embeddings) + if share_weights: + self.lm_head.weight = self.wte.weight + def forward(self, input_ids): x = self.wte(input_ids) x = self.linear(x) @@ -43,3 +46,27 @@ def test_compress_weights(): n_compressed_weights += 1 assert n_compressed_weights == n_target_modules + + +def test_compress_shared_weights(): + model = ShortTransformer(5, 10, share_weights=True) + + compressed_model = compress_weights(model) + + n_compressed_weights = 0 + n_target_modules = 0 + + for _, module in compressed_model.named_children(): + if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)): + n_target_modules += 1 + if module.weight.dtype in [torch.uint8, torch.int8]: + n_compressed_weights += 1 + + assert n_compressed_weights == n_target_modules + + assert len(compressed_model.wte.pre_ops) > 0 + + assert len(compressed_model.wte.pre_ops) == len(compressed_model.lm_head.pre_ops) + + for key, val in compressed_model.wte.pre_ops.items(): + assert compressed_model.lm_head.get_pre_op(key) is val From 8400793257af7779060f9042572471f46d862096 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 12 Sep 2023 09:27:26 +0100 Subject: [PATCH 10/15] Add support for the dump_intermediate_model parameter (#2086) ### Changes - Add support for the `dump_intermediate_model` parameter to save fully quantized model in the AAQ pipeline ### Reason for changes - Alignment with POT ### Related tickets N/A ### Tests N/A --- nncf/openvino/quantization/quantize_model.py | 4 +++ nncf/quantization/advanced_parameters.py | 4 +++ tests/openvino/tools/calibrate.py | 30 ++++++++++++++++---- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 4368f1a7578..cefa084cd7d 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -183,6 +183,10 @@ def native_quantize_with_accuracy_control_impl( copied_parameters, ) + if advanced_accuracy_restorer_parameters.intermediate_model_dir: + quantized_model_path = f"{advanced_accuracy_restorer_parameters.intermediate_model_dir}/intermediate_model.xml" + ov.serialize(quantized_model, quantized_model_path) + evaluator = Evaluator(validation_fn) evaluator.enable_iteration_count() initial_metric_results = evaluator.collect_metric_results(model, validation_dataset, model_name="initial") diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index a3ec119ddeb..a8ed96c8ff8 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -193,12 +193,16 @@ class AdvancedAccuracyRestorerParameters: :param num_ranking_processes: The number of parallel processes that are used to rank quantization operations. :type num_ranking_processes: Optional[int] + :param intermediate_model_dir: Path to the folder where the model, which was fully + quantized with initial parameters, should be saved. + :type intermediate_model_dir: Optional[str] """ max_num_iterations: int = sys.maxsize tune_hyperparams: bool = False ranking_subset_size: Optional[int] = None num_ranking_processes: Optional[int] = None + intermediate_model_dir: Optional[str] = None def changes_asdict(params: Any) -> Dict[str, Any]: diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index 82f65490877..1662d767958 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -10,6 +10,7 @@ # limitations under the License. # pylint:disable=too-many-lines +import functools import json import multiprocessing import os @@ -516,6 +517,19 @@ def map_tune_hyperparams(tune_hyperparams): return {"advanced_accuracy_restorer_parameters": advanced_parameters} +def map_dump_intermediate_model(dump_intermediate_model, output_dir): + intermediate_model_dir = None + + if dump_intermediate_model: + intermediate_model_dir = os.path.join(output_dir, "intermediate_model") + os.makedirs(intermediate_model_dir, exist_ok=True) + + ctx = get_algorithm_parameters_context() + advanced_parameters = ctx.params.get("advanced_accuracy_restorer_parameters", AdvancedAccuracyRestorerParameters()) + advanced_parameters.intermediate_model_dir = intermediate_model_dir + return {"advanced_accuracy_restorer_parameters": advanced_parameters} + + def create_parameters_for_algorithm( pot_parameters, supported_parameters, default_parameters, ignored_parameters, param_name_map ): @@ -587,9 +601,12 @@ def map_quantization_parameters(pot_parameters): return result -def map_quantize_with_accuracy_control_parameters(pot_parameters): +def map_quantize_with_accuracy_control_parameters(pot_parameters, output_dir): supported_parameters, default_parameters, ignored_parameters = get_pot_quantization_parameters_mapping() + ignored_parameters.remove("dump_intermediate_model") + map_dump_intermediate_model_fn = functools.partial(map_dump_intermediate_model, output_dir=output_dir) + supported_parameters.update( { "maximal_drop": lambda x: {"max_drop": x}, @@ -597,6 +614,7 @@ def map_quantize_with_accuracy_control_parameters(pot_parameters): "ranking_subset_size": map_ranking_subset_size, "tune_hyperparams": map_tune_hyperparams, "drop_type": map_drop_type, + "dump_intermediate_model": map_dump_intermediate_model_fn, } ) @@ -625,11 +643,11 @@ def map_quantize_with_accuracy_control_parameters(pot_parameters): return result -def map_paramaters(pot_algo_name, nncf_algo_name, pot_parameters): +def map_paramaters(pot_algo_name, nncf_algo_name, pot_parameters, output_dir): if nncf_algo_name == "quantize": return map_quantization_parameters(pot_parameters) if nncf_algo_name == "quantize_with_accuracy_control": - return map_quantize_with_accuracy_control_parameters(pot_parameters) + return map_quantize_with_accuracy_control_parameters(pot_parameters, output_dir) raise ValueError(f"Mapping POT {pot_algo_name} parameters to NNCF {nncf_algo_name} parameters is not supported") @@ -645,7 +663,7 @@ def get_accuracy_checker_config(engine_config): return engine_config -def get_nncf_algorithms_config(compression_config): +def get_nncf_algorithms_config(compression_config, output_dir): nncf_algorithms = {} override_options = {} for pot_algo in compression_config.algorithms: @@ -665,7 +683,7 @@ def get_nncf_algorithms_config(compression_config): override_options[nncf_algo_name]["parameters"].update(parameters) continue - nncf_algo_parameters = map_paramaters(pot_algo_name, nncf_algo_name, pot_algo.params) + nncf_algo_parameters = map_paramaters(pot_algo_name, nncf_algo_name, pot_algo.params, output_dir) if advanced_parameters is not None: nncf_algo_parameters["advanced_parameters"] = replace( @@ -989,7 +1007,7 @@ def main(): xml_path, bin_path = get_model_paths(config.model) accuracy_checker_config = get_accuracy_checker_config(config.engine) - nncf_algorithms_config = get_nncf_algorithms_config(config.compression) + nncf_algorithms_config = get_nncf_algorithms_config(config.compression, args.output_dir) set_log_file(f"{args.output_dir}/log.txt") output_dir = os.path.join(args.output_dir, "optimized") From 0a089662d425eda52bf4150114b0aed75b44a7d8 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Tue, 12 Sep 2023 15:10:50 +0200 Subject: [PATCH 11/15] tqdm progress bar improvements (#2114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Changes 1. Fixed an issue with wrong `tqdm` bar length in the case when calibration dataset length is less than `subset_size`. Reproducer: https://github.com/nikita-savelyevv/nncf/commit/f0951c14ab6c831fe2f6c389a605593846adeeab **Before:** `Statistics collection: 34%|██████ | 101/300 [00:03<00:06, 28.66it/s]` **After:** When dataset has `__len__`: `Statistics collection: 100%|██████████████████| 101/101 [00:03<00:00, 28.20it/s]` When dataset doesn't have `__len__`: `Statistics collection: 34%|██████ | 101/300 [00:03<00:06, 29.45it/s]` 2. Improved progress bar GUI when ran from notebooks. **Before:** Screenshot 2023-09-06 091857 or (in some browsers progress bar takes up multiple lines): ![image](https://github.com/openvinotoolkit/nncf/assets/23343961/99fa9629-2869-4d8f-872e-97ef59bc092e) **After:** Screenshot 2023-09-06 105453 In console the progress bar is the same. ### Reason for changes User experience improvement. ### Related tickets 112627 ### Tests --- nncf/common/tensor_statistics/aggregator.py | 10 ++- nncf/data/dataset.py | 9 +++ .../algorithms/bias_correction/algorithm.py | 2 +- .../algorithms/channel_alignment/algorithm.py | 2 +- .../fast_bias_correction/algorithm.py | 2 +- .../algorithms/smooth_quant/algorithm.py | 2 +- tests/common/test_dataset.py | 63 +++++++++++++++++++ 7 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 tests/common/test_dataset.py diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 7066fccf8b0..444c9581d55 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -13,7 +13,7 @@ from itertools import islice from typing import Any, Dict, TypeVar -from tqdm import tqdm +from tqdm.auto import tqdm from nncf.common import factory from nncf.common.graph.graph import NNCFGraph @@ -54,9 +54,15 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: model_with_outputs = model_transformer.transform(transformation_layout) engine = factory.EngineFactory.create(model_with_outputs) + dataset_length = self.dataset.get_length() + total = ( + min(dataset_length or self.stat_subset_size, self.stat_subset_size) + if self.stat_subset_size is not None + else None + ) for input_data in tqdm( islice(self.dataset.get_inference_data(), self.stat_subset_size), - total=self.stat_subset_size, + total=total, desc="Statistics collection", ): outputs = engine.infer(input_data) diff --git a/nncf/data/dataset.py b/nncf/data/dataset.py index 6bf1322c2ec..d89fb6fc84c 100644 --- a/nncf/data/dataset.py +++ b/nncf/data/dataset.py @@ -72,6 +72,15 @@ def get_inference_data(self, indices: Optional[List[int]] = None) -> Iterable[Mo """ return DataProvider(self._data_source, self._transform_func, indices) + def get_length(self) -> Optional[int]: + """ + Tries to fetch length of the underlying dataset. + :return: The length of the data_source if __len__() is implemented for it, and None otherwise. + """ + if hasattr(self._data_source, "__len__"): + return self._data_source.__len__() + return None + class DataProvider(Generic[DataItem, ModelInput]): def __init__( diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 221edd992de..10bc844ec2c 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -13,7 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple, TypeVar import numpy as np -from tqdm import tqdm +from tqdm.auto import tqdm from nncf import Dataset from nncf import nncf_logger diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index dcec35d910b..08bf7731e18 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -12,7 +12,7 @@ from typing import Any, Dict, List, Optional, Tuple, TypeVar import numpy as np -from tqdm import tqdm +from tqdm.auto import tqdm from nncf import Dataset from nncf.common.factory import CommandCreatorFactory diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 4a294b5a0f7..8192eebb460 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union -from tqdm import tqdm +from tqdm.auto import tqdm from nncf import Dataset from nncf.common.factory import EngineFactory diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 7a121f31ea7..5be71625a3a 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -24,7 +24,7 @@ from copy import deepcopy from typing import Dict, List, Optional, Tuple, TypeVar -from tqdm import tqdm +from tqdm.auto import tqdm from nncf import Dataset from nncf.common.factory import ModelTransformerFactory diff --git a/tests/common/test_dataset.py b/tests/common/test_dataset.py new file mode 100644 index 00000000000..55614716de1 --- /dev/null +++ b/tests/common/test_dataset.py @@ -0,0 +1,63 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nncf import Dataset + + +def test_dataset(): + raw_data = list(range(50)) + dataset = Dataset(raw_data) + + data_provider = dataset.get_data() + retrieved_data_items = list(data_provider) + assert all(raw_data[i] == retrieved_data_items[i] for i in range(len(raw_data))) + + +def test_dataset_with_transform_func(): + raw_data = list(range(50)) + dataset = Dataset(raw_data, transform_func=lambda it: 2 * it) + + data_provider = dataset.get_inference_data() + retrieved_data_items = list(data_provider) + assert all(2 * raw_data[i] == retrieved_data_items[i] for i in range(len(raw_data))) + + +def test_dataset_with_indices(): + raw_data = list(range(50)) + dataset = Dataset(raw_data) + + data_provider = dataset.get_data(indices=list(range(0, 50, 2))) + retrieved_data_items = list(data_provider) + assert all(raw_data[2 * i] == retrieved_data_items[i] for i in range(len(raw_data) // 2)) + + +def test_dataset_with_transform_func_with_indices(): + raw_data = list(range(50)) + dataset = Dataset(raw_data, transform_func=lambda it: 2 * it) + + data_provider = dataset.get_inference_data(indices=list(range(0, 50, 2))) + retrieved_data_items = list(data_provider) + assert all(2 * raw_data[2 * i] == retrieved_data_items[i] for i in range(len(raw_data) // 2)) + + +def test_dataset_without_length(): + raw_data = list(range(50)) + dataset_with_length = Dataset(raw_data) + dataset_without_length = Dataset(iter(raw_data)) + assert dataset_with_length.get_length() == 50 + assert dataset_without_length.get_length() is None + + data_provider = dataset_with_length.get_data() + retrieved_data_items = list(data_provider) + assert all(raw_data[i] == retrieved_data_items[i] for i in range(len(raw_data))) + + data_provider = dataset_without_length.get_data() + retrieved_data_items = list(data_provider) + assert all(raw_data[i] == retrieved_data_items[i] for i in range(len(raw_data))) From 8cdba7f9a77ae35b3004bbcf94aa96df44347e06 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 13 Sep 2023 10:54:51 +0200 Subject: [PATCH 12/15] Upgrade ultralytics to 8.0.170 (#2133) ### Changes Upgrade ultralytics to 8.0.170 ### Reason for changes For some reason yolo samples started to fail. Upgrading ultralytics solves this issue because the later version contains these changes: https://github.com/ultralytics/ultralytics/commit/a7419617a6f2f74849161f3faa5b65d2cef92bd5 ### Related tickets 120311 ### Tests Build 82 passed --- .../openvino/yolov8/main.py | 22 +++++++++--------- .../openvino/yolov8/requirements.txt | 2 +- .../main.py | 23 ++++++++++--------- .../requirements.txt | 2 +- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/examples/post_training_quantization/openvino/yolov8/main.py b/examples/post_training_quantization/openvino/yolov8/main.py index 0639a50cf6f..f20730970f6 100644 --- a/examples/post_training_quantization/openvino/yolov8/main.py +++ b/examples/post_training_quantization/openvino/yolov8/main.py @@ -17,14 +17,14 @@ import openvino.runtime as ov import torch from tqdm import tqdm -from ultralytics import YOLO -from ultralytics.yolo.cfg import get_cfg -from ultralytics.yolo.data.utils import check_det_dataset -from ultralytics.yolo.engine.validator import BaseValidator as Validator -from ultralytics.yolo.utils import DATASETS_DIR -from ultralytics.yolo.utils import DEFAULT_CFG -from ultralytics.yolo.utils import ops -from ultralytics.yolo.utils.metrics import ConfusionMatrix +from ultralytics.cfg import get_cfg +from ultralytics.data.converter import coco80_to_coco91_class +from ultralytics.data.utils import check_det_dataset +from ultralytics.engine.validator import BaseValidator as Validator +from ultralytics.models.yolo import YOLO +from ultralytics.utils import DATASETS_DIR +from ultralytics.utils import DEFAULT_CFG +from ultralytics.utils.metrics import ConfusionMatrix import nncf @@ -66,17 +66,17 @@ def print_statistics(stats: np.ndarray, total_images: int, total_objects: int) - def prepare_validation(model: YOLO, args: Any) -> Tuple[Validator, torch.utils.data.DataLoader]: - validator = model.ValidatorClass(args) + validator = model.smart_load("validator")(args) validator.data = check_det_dataset(args.data) dataset = validator.data["val"] print(f"{dataset}") data_loader = validator.get_dataloader(f"{DATASETS_DIR}/coco128", 1) - validator = model.ValidatorClass(args) + validator = model.smart_load("validator")(args) validator.is_coco = True - validator.class_map = ops.coco80_to_coco91_class() + validator.class_map = coco80_to_coco91_class() validator.names = model.model.names validator.metrics.names = validator.names validator.nc = model.model.model[-1].nc diff --git a/examples/post_training_quantization/openvino/yolov8/requirements.txt b/examples/post_training_quantization/openvino/yolov8/requirements.txt index 7eabe65620b..bcbac83cbd1 100644 --- a/examples/post_training_quantization/openvino/yolov8/requirements.txt +++ b/examples/post_training_quantization/openvino/yolov8/requirements.txt @@ -1,3 +1,3 @@ -ultralytics==8.0.43 +ultralytics==8.0.170 onnx>=1.12.0 openvino-dev==2023.0.1 \ No newline at end of file diff --git a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py index 42bac62ce55..a6e17830289 100644 --- a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py +++ b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/main.py @@ -18,14 +18,15 @@ import openvino.runtime as ov import torch from tqdm import tqdm -from ultralytics import YOLO -from ultralytics.yolo.cfg import get_cfg -from ultralytics.yolo.data.utils import check_det_dataset -from ultralytics.yolo.engine.validator import BaseValidator as Validator -from ultralytics.yolo.utils import DATASETS_DIR -from ultralytics.yolo.utils import DEFAULT_CFG -from ultralytics.yolo.utils import ops -from ultralytics.yolo.utils.metrics import ConfusionMatrix +from ultralytics.cfg import get_cfg +from ultralytics.data.converter import coco80_to_coco91_class +from ultralytics.data.utils import check_det_dataset +from ultralytics.engine.validator import BaseValidator as Validator +from ultralytics.models.yolo import YOLO +from ultralytics.utils import DATASETS_DIR +from ultralytics.utils import DEFAULT_CFG +from ultralytics.utils import ops +from ultralytics.utils.metrics import ConfusionMatrix import nncf @@ -91,17 +92,17 @@ def print_statistics(stats: np.ndarray, total_images: int, total_objects: int) - def prepare_validation(model: YOLO, args: Any) -> Tuple[Validator, torch.utils.data.DataLoader]: - validator = model.ValidatorClass(args) + validator = model.smart_load("validator")(args) validator.data = check_det_dataset(args.data) dataset = validator.data["val"] print(f"{dataset}") data_loader = validator.get_dataloader(f"{DATASETS_DIR}/coco128-seg", 1) - validator = model.ValidatorClass(args) + validator = model.smart_load("validator")(args) validator.is_coco = True - validator.class_map = ops.coco80_to_coco91_class() + validator.class_map = coco80_to_coco91_class() validator.names = model.model.names validator.metrics.names = validator.names validator.nc = model.model.model[-1].nc diff --git a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt index 7eabe65620b..bcbac83cbd1 100644 --- a/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt +++ b/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control/requirements.txt @@ -1,3 +1,3 @@ -ultralytics==8.0.43 +ultralytics==8.0.170 onnx>=1.12.0 openvino-dev==2023.0.1 \ No newline at end of file From 19d7260310d07b6308e3bec6b585ed1a57054a05 Mon Sep 17 00:00:00 2001 From: Przemyslaw Wysocki Date: Wed, 13 Sep 2023 12:24:45 +0200 Subject: [PATCH 13/15] Remove upper bound from `scipy` version (#2104) ### Changes Removal of upper bounds from `scipy` version. ### Reason for changes - `scipy<1.11.1` has security vulnerability (see ticket) - The upper bound is causing pip conflicts in https://github.com/openvinotoolkit/openvino/pull/19458 ### Related tickets 117438 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 65fbab5244a..2058599c505 100644 --- a/setup.py +++ b/setup.py @@ -120,7 +120,7 @@ def find_version(*file_paths): # Ticket: 69520 "pyparsing<3.0", "scikit-learn>=0.24.0", - "scipy>=1.3.2, <1.11", + "scipy>=1.3.2", "texttable>=1.6.3", "tqdm>=4.54.1", ] From e46e1c5ee902ac5cee5ef91f10f0d7fa1e1202bf Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Fri, 15 Sep 2023 07:24:33 +0200 Subject: [PATCH 14/15] [PTQ] Fix non-optional algos in calibrate.py (#2137) ### Changes - Fixed behaviour in the `calibrate.py` for algos without options ### Reason for changes - Bugfix ### Related tickets - 120295 ### Tests --- tests/openvino/tools/calibrate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/openvino/tools/calibrate.py b/tests/openvino/tools/calibrate.py index 1662d767958..6ca576b0052 100644 --- a/tests/openvino/tools/calibrate.py +++ b/tests/openvino/tools/calibrate.py @@ -981,6 +981,8 @@ def filter_configuration(config: Config) -> Config: # Drop params before configure for algorithm_config in config["compression"]["algorithms"]: algo_params = algorithm_config.get("params") + if algo_params is None: + continue algo_name = algorithm_config.get("name") for field_to_filter in fields_to_filter: field_value = algo_params.get(field_to_filter) From 8472e6784f1da77863874721d84c95441a39b7b4 Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Fri, 15 Sep 2023 07:58:51 +0200 Subject: [PATCH 15/15] [Tests][Bug] Experemental tests moved to backend directory to run them properly by make command (#2127) ### Changes All tests from `tests/experimental/{backend}/` are moved to directories `tests/{backed}/experimental` ### Reason for changes To enable this tests when make command is called. This tests are not running in precommit on current develop branch --- nncf/tensorflow/utils/state.py | 2 ++ .../common => common/experimental}/__init__.py | 0 .../experimental}/test_reducers_and_aggregators.py | 0 .../experimental}/test_statistic_collector.py | 0 .../native/quantization/test_reducers_and_aggregators.py | 2 +- tests/openvino/native/test_statistic_collector.py | 2 +- .../tensorflow => tensorflow/experimental}/__init__.py | 0 .../quantized/w_sym_ch_a_asym_t/resnet50.pb | 0 .../quantized/w_sym_t_a_sym_t/resnet50.pb | 0 .../reference_graphs/quantized/w_sym_t_a_sym_t/simple.pb | 0 .../experimental}/test_compressed_graph.py | 8 +++++--- .../experimental}/test_context_independence.py | 9 ++++++--- .../experimental}/test_keras_layer_model.py | 6 ++++-- .../experimental}/test_models/__init__.py | 2 +- .../experimental}/test_models/resnet.py | 0 15 files changed, 20 insertions(+), 11 deletions(-) rename tests/{experimental/common => common/experimental}/__init__.py (100%) rename tests/{experimental/common => common/experimental}/test_reducers_and_aggregators.py (100%) rename tests/{experimental/common => common/experimental}/test_statistic_collector.py (100%) rename tests/{experimental/tensorflow => tensorflow/experimental}/__init__.py (100%) rename tests/{experimental/tensorflow => tensorflow/experimental}/data/reference_graphs/quantized/w_sym_ch_a_asym_t/resnet50.pb (100%) rename tests/{experimental/tensorflow => tensorflow/experimental}/data/reference_graphs/quantized/w_sym_t_a_sym_t/resnet50.pb (100%) rename tests/{experimental/tensorflow => tensorflow/experimental}/data/reference_graphs/quantized/w_sym_t_a_sym_t/simple.pb (100%) rename tests/{experimental/tensorflow => tensorflow/experimental}/test_compressed_graph.py (94%) rename tests/{experimental/tensorflow => tensorflow/experimental}/test_context_independence.py (90%) rename tests/{experimental/tensorflow => tensorflow/experimental}/test_keras_layer_model.py (86%) rename tests/{experimental/tensorflow => tensorflow/experimental}/test_models/__init__.py (90%) rename tests/{experimental/tensorflow => tensorflow/experimental}/test_models/resnet.py (100%) diff --git a/nncf/tensorflow/utils/state.py b/nncf/tensorflow/utils/state.py index 7b541068e45..bd0413171ca 100644 --- a/nncf/tensorflow/utils/state.py +++ b/nncf/tensorflow/utils/state.py @@ -17,6 +17,8 @@ from nncf.common.compression import BaseCompressionAlgorithmController +# TODO(achurkin): remove pylint ignore after 120296 ticked is fixed +# pylint: disable=abstract-method class TFCompressionState(tf.train.experimental.PythonState): """ A wrapper for `BaseCompressionAlgorithmController` that allows saving diff --git a/tests/experimental/common/__init__.py b/tests/common/experimental/__init__.py similarity index 100% rename from tests/experimental/common/__init__.py rename to tests/common/experimental/__init__.py diff --git a/tests/experimental/common/test_reducers_and_aggregators.py b/tests/common/experimental/test_reducers_and_aggregators.py similarity index 100% rename from tests/experimental/common/test_reducers_and_aggregators.py rename to tests/common/experimental/test_reducers_and_aggregators.py diff --git a/tests/experimental/common/test_statistic_collector.py b/tests/common/experimental/test_statistic_collector.py similarity index 100% rename from tests/experimental/common/test_statistic_collector.py rename to tests/common/experimental/test_statistic_collector.py diff --git a/tests/openvino/native/quantization/test_reducers_and_aggregators.py b/tests/openvino/native/quantization/test_reducers_and_aggregators.py index 4726ac61194..b34fc27a7c4 100644 --- a/tests/openvino/native/quantization/test_reducers_and_aggregators.py +++ b/tests/openvino/native/quantization/test_reducers_and_aggregators.py @@ -23,7 +23,7 @@ from nncf.openvino.statistics.collectors import OVNoopReducer from nncf.openvino.statistics.collectors import OVQuantileReducer from nncf.openvino.tensor import OVNNCFTensor -from tests.experimental.common.test_reducers_and_aggregators import TemplateTestReducersAggreagtors +from tests.common.experimental.test_reducers_and_aggregators import TemplateTestReducersAggreagtors class TestReducersAggregators(TemplateTestReducersAggreagtors): diff --git a/tests/openvino/native/test_statistic_collector.py b/tests/openvino/native/test_statistic_collector.py index d4dca76f9c5..2d52c0af4cd 100644 --- a/tests/openvino/native/test_statistic_collector.py +++ b/tests/openvino/native/test_statistic_collector.py @@ -10,7 +10,7 @@ # limitations under the License. from nncf.openvino.tensor import OVNNCFTensor -from tests.experimental.common.test_statistic_collector import TemplateTestStatisticCollector +from tests.common.experimental.test_statistic_collector import TemplateTestStatisticCollector class TestOVStatisticCollector(TemplateTestStatisticCollector): diff --git a/tests/experimental/tensorflow/__init__.py b/tests/tensorflow/experimental/__init__.py similarity index 100% rename from tests/experimental/tensorflow/__init__.py rename to tests/tensorflow/experimental/__init__.py diff --git a/tests/experimental/tensorflow/data/reference_graphs/quantized/w_sym_ch_a_asym_t/resnet50.pb b/tests/tensorflow/experimental/data/reference_graphs/quantized/w_sym_ch_a_asym_t/resnet50.pb similarity index 100% rename from tests/experimental/tensorflow/data/reference_graphs/quantized/w_sym_ch_a_asym_t/resnet50.pb rename to tests/tensorflow/experimental/data/reference_graphs/quantized/w_sym_ch_a_asym_t/resnet50.pb diff --git a/tests/experimental/tensorflow/data/reference_graphs/quantized/w_sym_t_a_sym_t/resnet50.pb b/tests/tensorflow/experimental/data/reference_graphs/quantized/w_sym_t_a_sym_t/resnet50.pb similarity index 100% rename from tests/experimental/tensorflow/data/reference_graphs/quantized/w_sym_t_a_sym_t/resnet50.pb rename to tests/tensorflow/experimental/data/reference_graphs/quantized/w_sym_t_a_sym_t/resnet50.pb diff --git a/tests/experimental/tensorflow/data/reference_graphs/quantized/w_sym_t_a_sym_t/simple.pb b/tests/tensorflow/experimental/data/reference_graphs/quantized/w_sym_t_a_sym_t/simple.pb similarity index 100% rename from tests/experimental/tensorflow/data/reference_graphs/quantized/w_sym_t_a_sym_t/simple.pb rename to tests/tensorflow/experimental/data/reference_graphs/quantized/w_sym_t_a_sym_t/simple.pb diff --git a/tests/experimental/tensorflow/test_compressed_graph.py b/tests/tensorflow/experimental/test_compressed_graph.py similarity index 94% rename from tests/experimental/tensorflow/test_compressed_graph.py rename to tests/tensorflow/experimental/test_compressed_graph.py index f2136c3aa8c..07b029b134b 100644 --- a/tests/experimental/tensorflow/test_compressed_graph.py +++ b/tests/tensorflow/experimental/test_compressed_graph.py @@ -14,8 +14,7 @@ import pytest import tensorflow as tf -from nncf.experimental.tensorflow.patch_tf import patch_tf_operations -from tests.experimental.tensorflow import test_models +from tests.tensorflow.experimental import test_models from tests.tensorflow.helpers import create_compressed_model_and_algo_for_test from tests.tensorflow.test_compressed_graph import QUANTIZERS from tests.tensorflow.test_compressed_graph import ModelDesc @@ -27,7 +26,9 @@ from tests.tensorflow.test_compressed_graph import prepare_and_check_graph_def from tests.tensorflow.test_compressed_graph import prepare_and_check_nx_graph -patch_tf_operations() +# TODO(achurkin): enable after 120296 ticked is fixed +# from nncf.experimental.tensorflow.patch_tf import patch_tf_operations +# patch_tf_operations() MODELS = [ @@ -75,6 +76,7 @@ def check_model_graph_v2(compressed_model, ref_graph_filename, ref_graph_dir, re prepare_and_check_nx_graph(compressed_graph, graph_path, ref_graph_exist, graph_to_layer_var_names_map) +@pytest.mark.skip(reason="ticket 120296") @pytest.mark.parametrize("desc", MODELS, ids=MODELS_IDS) def test_quantize_network_v2(desc: ModelDesc, _quantization_case_config_v2: QuantizeTestCaseConfiguration): model = desc.model_builder() diff --git a/tests/experimental/tensorflow/test_context_independence.py b/tests/tensorflow/experimental/test_context_independence.py similarity index 90% rename from tests/experimental/tensorflow/test_context_independence.py rename to tests/tensorflow/experimental/test_context_independence.py index 0ecf2c6d6f7..be150fd752d 100644 --- a/tests/experimental/tensorflow/test_context_independence.py +++ b/tests/tensorflow/experimental/test_context_independence.py @@ -11,16 +11,18 @@ import os +import pytest import tensorflow as tf -from nncf.experimental.tensorflow.patch_tf import patch_tf_operations -from tests.experimental.tensorflow.test_compressed_graph import check_model_graph_v2 +from tests.tensorflow.experimental.test_compressed_graph import check_model_graph_v2 from tests.tensorflow.helpers import create_compressed_model_and_algo_for_test from tests.tensorflow.test_compressed_graph import QuantizeTestCaseConfiguration from tests.tensorflow.test_compressed_graph import create_test_name from tests.tensorflow.test_compressed_graph import get_basic_quantization_config -patch_tf_operations() +# TODO(achurkin): enable after 120296 ticked is fixed +# from nncf.experimental.tensorflow.patch_tf import patch_tf_operations +# patch_tf_operations() class ModelWithSharedLayer(tf.keras.Model): @@ -51,6 +53,7 @@ def get_config(self): raise NotImplementedError +@pytest.mark.skip(reason="ticket 120296") def test_context_independence(): params = {"activations": ("symmetric", "per_tensor"), "weights": ("symmetric", "per_tensor")} diff --git a/tests/experimental/tensorflow/test_keras_layer_model.py b/tests/tensorflow/experimental/test_keras_layer_model.py similarity index 86% rename from tests/experimental/tensorflow/test_keras_layer_model.py rename to tests/tensorflow/experimental/test_keras_layer_model.py index 1369b880108..32851f43fa5 100644 --- a/tests/experimental/tensorflow/test_keras_layer_model.py +++ b/tests/tensorflow/experimental/test_keras_layer_model.py @@ -14,12 +14,14 @@ import tensorflow_hub as hub from nncf import NNCFConfig -from nncf.experimental.tensorflow.patch_tf import patch_tf_operations from tests.tensorflow.helpers import create_compressed_model_and_algo_for_test -patch_tf_operations() +# TODO(achurkin): enable after 120296 ticked is fixed +# from nncf.experimental.tensorflow.patch_tf import patch_tf_operations +# patch_tf_operations() +@pytest.mark.skip(reason="ticket 120296") def test_keras_layer_model(): nncf_config = NNCFConfig( { diff --git a/tests/experimental/tensorflow/test_models/__init__.py b/tests/tensorflow/experimental/test_models/__init__.py similarity index 90% rename from tests/experimental/tensorflow/test_models/__init__.py rename to tests/tensorflow/experimental/test_models/__init__.py index 0b4fa2fd49a..b5886131671 100644 --- a/tests/experimental/tensorflow/test_models/__init__.py +++ b/tests/tensorflow/experimental/test_models/__init__.py @@ -9,4 +9,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from tests.experimental.tensorflow.test_models.resnet import resnet_50 +from tests.tensorflow.experimental.test_models.resnet import resnet_50 diff --git a/tests/experimental/tensorflow/test_models/resnet.py b/tests/tensorflow/experimental/test_models/resnet.py similarity index 100% rename from tests/experimental/tensorflow/test_models/resnet.py rename to tests/tensorflow/experimental/test_models/resnet.py