From 4377f8c4fc6cec379a8b7dbddc87a53d4cf727cd Mon Sep 17 00:00:00 2001
From: Ryan O'Shea <ryan.oshea3@arm.com>
Date: Wed, 25 Sep 2024 10:06:41 +0200
Subject: [PATCH] CMSIS-NN Min Max int8 support

  * Moves common functions to new maximum_minimum.h
  * Creates cmsis-nn/maximum_minimum.cc

Change-Id: Ifbb3fedf53043b2f8d4c48d73c2ca44c7f0f87ca
Signed-off-by: Ryan O'Shea <ryan.oshea3@arm.com>
---
 tensorflow/lite/micro/kernels/BUILD           |   1 +
 .../micro/kernels/cmsis_nn/maximum_minimum.cc | 159 ++++++++++++++++++
 .../lite/micro/kernels/maximum_minimum.cc     |  50 +-----
 .../lite/micro/kernels/maximum_minimum.h      |  85 ++++++++++
 .../tools/make/ext_libs/cmsis_nn_download.sh  |   4 +-
 5 files changed, 249 insertions(+), 50 deletions(-)
 create mode 100644 tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc
 create mode 100644 tensorflow/lite/micro/kernels/maximum_minimum.h

diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
index 29a369eda33..175e9148899 100644
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@@ -313,6 +313,7 @@ tflm_kernel_cc_library(
         "logistic.h",
         "lstm_eval.h",
         "lstm_shared.h",
+        "maximum_minimum.h",
         "micro_ops.h",
         "mul.h",
         "pad.h",
diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc b/tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc
new file mode 100644
index 00000000000..73da17baedf
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/cmsis_nn/maximum_minimum.cc
@@ -0,0 +1,159 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/kernels/maximum_minimum.h"
+
+#include "Include/arm_nnfunctions.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+namespace {
+
+cmsis_nn_dims FillVariableShape(int32_t rank, int32_t* tensor_dims) {
+  if (rank == 4) {
+    return {tensor_dims[0], tensor_dims[1], tensor_dims[2], tensor_dims[3]};
+  } else if (rank == 3) {
+    return {1, tensor_dims[0], tensor_dims[1], tensor_dims[2]};
+  } else if (rank == 2) {
+    return {1, 1, tensor_dims[0], tensor_dims[1]};
+  } else {
+    return {1, 1, 1, 1};
+  }
+}
+
+TfLiteStatus EvalMaximum(TfLiteContext* context, TfLiteNode* node) {
+  OpContext op_context(context, node);
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  RuntimeShape input_1_shape = tflite::micro::GetTensorShape(input1);
+  RuntimeShape input_2_shape = tflite::micro::GetTensorShape(input2);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+
+  cmsis_nn_dims input_1_dims = FillVariableShape(
+      input_1_shape.DimensionsCount(), input_1_shape.DimsData());
+  cmsis_nn_dims input_2_dims = FillVariableShape(
+      input_2_shape.DimensionsCount(), input_2_shape.DimsData());
+  cmsis_nn_dims output_dims = FillVariableShape(output_shape.DimensionsCount(),
+                                                output_shape.DimsData());
+
+  switch (op_context.output->type) {
+    case kTfLiteInt8:
+      cmsis_nn_context ctx;
+      ctx.buf = nullptr;
+      ctx.size = 0;
+
+      arm_maximum_s8(
+          &ctx, tflite::micro::GetTensorData<int8_t>(input1), &input_1_dims,
+          tflite::micro::GetTensorData<int8_t>(input2), &input_2_dims,
+          tflite::micro::GetTensorData<int8_t>(output), &output_dims);
+      break;
+    case kTfLiteFloat32:
+      TFLiteOperation<float, MaximumOp>(context, node, op_context);
+      break;
+    case kTfLiteInt16:
+      TFLiteOperation<int16_t, MaximumOp>(context, node, op_context);
+      break;
+    case kTfLiteInt32:
+      TFLiteOperation<int32_t, MaximumOp>(context, node, op_context);
+      break;
+    case kTfLiteInt64:
+      TFLiteOperation<int64_t, MaximumOp>(context, node, op_context);
+      break;
+    default:
+      MicroPrintf("Type %s (%d) is not supported by Maximum/Minimum.",
+                  TfLiteTypeGetName(op_context.output->type),
+                  op_context.output->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalMinimum(TfLiteContext* context, TfLiteNode* node) {
+  OpContext op_context(context, node);
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  RuntimeShape input_1_shape = tflite::micro::GetTensorShape(input1);
+  RuntimeShape input_2_shape = tflite::micro::GetTensorShape(input2);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+
+  cmsis_nn_dims input_1_dims = FillVariableShape(
+      input_1_shape.DimensionsCount(), input_1_shape.DimsData());
+  cmsis_nn_dims input_2_dims = FillVariableShape(
+      input_2_shape.DimensionsCount(), input_2_shape.DimsData());
+  cmsis_nn_dims output_dims = FillVariableShape(output_shape.DimensionsCount(),
+                                                output_shape.DimsData());
+
+  switch (op_context.output->type) {
+    case kTfLiteInt8:
+      cmsis_nn_context ctx;
+      ctx.buf = nullptr;
+      ctx.size = 0;
+
+      arm_minimum_s8(
+          &ctx, tflite::micro::GetTensorData<int8_t>(input1), &input_1_dims,
+          tflite::micro::GetTensorData<int8_t>(input2), &input_2_dims,
+          tflite::micro::GetTensorData<int8_t>(output), &output_dims);
+      break;
+    case kTfLiteFloat32:
+      TFLiteOperation<float, MinimumOp>(context, node, op_context);
+      break;
+    case kTfLiteInt16:
+      TFLiteOperation<int16_t, MinimumOp>(context, node, op_context);
+      break;
+    case kTfLiteInt32:
+      TFLiteOperation<int32_t, MinimumOp>(context, node, op_context);
+      break;
+    case kTfLiteInt64:
+      TFLiteOperation<int64_t, MinimumOp>(context, node, op_context);
+      break;
+    default:
+      MicroPrintf("Type %s (%d) is not supported by Maximum/Minimum.",
+                  TfLiteTypeGetName(op_context.output->type),
+                  op_context.output->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TFLMRegistration Register_MAXIMUM() {
+  return tflite::micro::RegisterOp(nullptr, nullptr, EvalMaximum);
+}
+
+TFLMRegistration Register_MINIMUM() {
+  return tflite::micro::RegisterOp(nullptr, nullptr, EvalMinimum);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/maximum_minimum.cc b/tensorflow/lite/micro/kernels/maximum_minimum.cc
index 4dc87b40148..ef4a0a6a522 100644
--- a/tensorflow/lite/micro/kernels/maximum_minimum.cc
+++ b/tensorflow/lite/micro/kernels/maximum_minimum.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,59 +23,13 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/maximum_minimum.h"
 #include "tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
 namespace {
 
-// This file has a reference implementation of TFMaximum/TFMinimum.
-enum KernelType {
-  kReference,
-};
-
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpContext {
-  OpContext(TfLiteContext* context, TfLiteNode* node) {
-    input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
-    input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
-    output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
-  }
-  const TfLiteEvalTensor* input1;
-  const TfLiteEvalTensor* input2;
-  TfLiteEvalTensor* output;
-};
-
-struct MaximumOp {
-  template <typename data_type>
-  static data_type op(data_type el1, data_type el2) {
-    return el1 > el2 ? el1 : el2;
-  }
-};
-
-struct MinimumOp {
-  template <typename data_type>
-  static data_type op(data_type el1, data_type el2) {
-    return el1 < el2 ? el1 : el2;
-  }
-};
-
-template <typename data_type, typename op_type>
-void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
-                     const OpContext& op_context) {
-  reference_ops::MaximumMinimumBroadcastSlow(
-      tflite::micro::GetTensorShape(op_context.input1),
-      tflite::micro::GetTensorData<data_type>(op_context.input1),
-      tflite::micro::GetTensorShape(op_context.input2),
-      tflite::micro::GetTensorData<data_type>(op_context.input2),
-      tflite::micro::GetTensorShape(op_context.output),
-      tflite::micro::GetTensorData<data_type>(op_context.output),
-      op_type::template op<data_type>);
-}
-
 template <KernelType kernel_type, typename OpType>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
diff --git a/tensorflow/lite/micro/kernels/maximum_minimum.h b/tensorflow/lite/micro/kernels/maximum_minimum.h
new file mode 100644
index 00000000000..ac497fe51ae
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/maximum_minimum.h
@@ -0,0 +1,85 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MAXIMUM_MINIMUM_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_MAXIMUM_MINIMUM_H_
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+// This file has a reference implementation of TFMaximum/TFMinimum.
+enum KernelType {
+  kReference,
+};
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+struct OpContext {
+  OpContext(TfLiteContext* context, TfLiteNode* node) {
+    input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
+    input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
+    output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  }
+  const TfLiteEvalTensor* input1;
+  const TfLiteEvalTensor* input2;
+  TfLiteEvalTensor* output;
+};
+
+struct MaximumOp {
+  template <typename data_type>
+  static data_type op(data_type el1, data_type el2) {
+    return el1 > el2 ? el1 : el2;
+  }
+};
+
+struct MinimumOp {
+  template <typename data_type>
+  static data_type op(data_type el1, data_type el2) {
+    return el1 < el2 ? el1 : el2;
+  }
+};
+
+template <typename data_type, typename op_type>
+void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
+                     const OpContext& op_context) {
+  reference_ops::MaximumMinimumBroadcastSlow(
+      tflite::micro::GetTensorShape(op_context.input1),
+      tflite::micro::GetTensorData<data_type>(op_context.input1),
+      tflite::micro::GetTensorShape(op_context.input2),
+      tflite::micro::GetTensorData<data_type>(op_context.input2),
+      tflite::micro::GetTensorShape(op_context.output),
+      tflite::micro::GetTensorData<data_type>(op_context.output),
+      op_type::template op<data_type>);
+}
+
+TFLMRegistration Register_MAXIMUM();
+
+TFLMRegistration Register_MINIMUM();
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_MAXIMUM_MINIMUM_H_
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
index 04e76dd508c..26c6487f5f4 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
@@ -38,9 +38,9 @@ source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/bash_helpers.sh
 DOWNLOADS_DIR=${1}
 DOWNLOADED_CMSIS_NN_PATH=${DOWNLOADS_DIR}/cmsis_nn
 
-ZIP_PREFIX_NN="f2cb41ca1450a4eb4307b2779dd5aae9028285a5"
+ZIP_PREFIX_NN="5f8f1a96797cfce64032492151b01cf0e1c97f06"
 CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
-CMSIS_NN_MD5="4d0e623432d6f8d3b201cbcd89218adf"
+CMSIS_NN_MD5="903bbdaf3b73ed3c5e42e46b9d8f1f7e"
 
 should_download=$(check_should_download ${DOWNLOADS_DIR})