diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml
index b0f145b2e20..89c2621e0c6 100644
--- a/.github/workflows/clang-format-check.yml
+++ b/.github/workflows/clang-format-check.yml
@@ -65,7 +65,7 @@ jobs:
       #   Exclude register header files. Those don't follow clang formatting or it becomes unreadable
       - name: clang-format-check
         run: |
-          CHANGE_FILES=$(git diff --ignore-submodules --name-only remotes/origin/main '*.c' '*.h' ':!*_regs.h' ':!*ffconf.h' ':!*weights.h' ':!*cnn.h' ':!*cnn.c' ':!*sampledata.h' ':!*sampleoutput.h' ':!*softmax.c' ':!Examples/*/Coremark/*' ':!Libraries/FCL' ':!Libraries/FreeRTOS' ':!Libraries/lwIP' ':!Libraries/littlefs' ':!Libraries/FreeRTOS-Plus' ':!Libraries/LC3' ':!Libraries/SDHC' ':!Libraries/MAXUSB' ':!Libraries/Cordio' ':!Libraries/LVGL' ':!Libraries/Coremark' ':!Libraries/MiscDrivers/BarcodeDecoder' ':!Libraries/tinyusb')
+          CHANGE_FILES=$(git diff --ignore-submodules --name-only remotes/origin/main '*.c' '*.h' ':!*_regs.h' ':!*ffconf.h' ':!*weights.h' ':!*cnn.h' ':!*cnn.c' ':!*sampledata.h' ':!*sampleoutput.h' ':!*softmax.c' ':!Examples/*/Coremark/*' ':!Libraries/FCL' ':!Libraries/FreeRTOS' ':!Libraries/lwIP' ':!Libraries/littlefs' ':!Libraries/FreeRTOS-Plus' ':!Libraries/LC3' ':!Libraries/SDHC' ':!Libraries/MAXUSB' ':!Libraries/Cordio' ':!Libraries/LVGL' ':!Libraries/Coremark' ':!Libraries/MiscDrivers/BarcodeDecoder' ':!Libraries/tinyusb' ':!Libraries/CMSIS/5.9.0/DSP')
           if [[ "$CHANGE_FILES" != "" ]]; then
             bash -e .github/workflows/clang-format-run.sh $CHANGE_FILES
           fi
diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
index 9d67fa8ed30..6886c3cfc90 100644
--- a/.github/workflows/linter.yml
+++ b/.github/workflows/linter.yml
@@ -78,7 +78,7 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           
           # Skip autogenerated register files and internal use board files
-          FILTER_REGEX_EXCLUDE: (.*\/Libraries\/CMSIS\/Device\/Maxim\/.*\/Include\/partition_.*\.h)|(.*\/Libraries\/((CMSIS\/Device\/Maxim\/[^\/]*\/Include)|(PeriphDrivers\/Source\/[^\/]*))\/[^\/]*_regs\.h)|(.*\/Libraries\/Boards\/[^\/]*\/(BCB|Emulator|Simulation|ROM)[^\/]*\/.*)|(.*\/Examples\/[^\/]*\/Display\/lvgl-8\.0\.2\/.*)|(.*\/Examples\/[^\/]*\/Demo\/lv_conf.h)|(.*\/Libraries\/CMSIS\/5.9.0\/.*)|(.*\/Libraries\/CMSIS\/Include\/.*)|(.*\/Libraries\/Cordio\/.*)|(.*\/Libraries\/SDHC\/.*)|(.*\/Libraries\/lwIP\/.*)|(.*\/Examples\/[^\/]*\/CNN\/.*)|(.*\/Libraries\/tinyusb\/.*)|(.*\/Libraries\/CMSIS\/Device\/Maxim\/MAX32657\/Source\/Template\/.*)|(.*\/partition_.*\.h)
+          FILTER_REGEX_EXCLUDE: (.*\/Libraries\/CMSIS\/Device\/Maxim\/.*\/Include\/partition_.*\.h)|(.*\/Libraries\/((CMSIS\/Device\/Maxim\/[^\/]*\/Include)|(PeriphDrivers\/Source\/[^\/]*))\/[^\/]*_regs\.h)|(.*\/Libraries\/Boards\/[^\/]*\/(BCB|Emulator|Simulation|ROM)[^\/]*\/.*)|(.*\/Examples\/[^\/]*\/Display\/lvgl-8\.0\.2\/.*)|(.*\/Examples\/[^\/]*\/Demo\/lv_conf.h)|(.*\/Libraries\/CMSIS\/5.9.0\/.*)|(.*\/Libraries\/CMSIS\/Include\/.*)|(.*\/Libraries\/Cordio\/.*)|(.*\/Libraries\/SDHC\/.*)|(.*\/Libraries\/lwIP\/.*)|(.*\/Examples\/[^\/]*\/CNN\/.*)|(.*\/Libraries\/tinyusb\/.*)|(.*\/Libraries\/CMSIS\/5\.9\.0\/DSP\/.*)|(.*\/Libraries\/CMSIS\/Device\/Maxim\/MAX32657\/Source\/Template\/.*)|(.*\/partition_.*\.h)
 
           # Explicity turn off all linters except CPP
           # SuperLinter Documentation says all we need to do is turn on the ones we want 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_common_tables.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_common_tables.h
new file mode 100644
index 00000000000..6a9270437fb
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_common_tables.h
@@ -0,0 +1,318 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_common_tables.h
+ * Description:  Extern declaration for common tables
+ *
+ * @version  V1.10.0
+ * @date     08 July 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARM_COMMON_TABLES_H
+#define ARM_COMMON_TABLES_H
+
+#include "arm_math_types.h"
+#include "dsp/fast_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+  /* Double Precision Float CFFT twiddles */
+    extern const uint16_t armBitRevTable[1024];
+
+    extern const uint64_t twiddleCoefF64_16[32];
+
+    extern const uint64_t twiddleCoefF64_32[64];
+
+    extern const uint64_t twiddleCoefF64_64[128];
+
+    extern const uint64_t twiddleCoefF64_128[256];
+
+    extern const uint64_t twiddleCoefF64_256[512];
+
+    extern const uint64_t twiddleCoefF64_512[1024];
+
+    extern const uint64_t twiddleCoefF64_1024[2048];
+
+    extern const uint64_t twiddleCoefF64_2048[4096];
+
+    extern const uint64_t twiddleCoefF64_4096[8192];
+
+    extern const float32_t twiddleCoef_16[32];
+
+    extern const float32_t twiddleCoef_32[64];
+
+    extern const float32_t twiddleCoef_64[128];
+
+    extern const float32_t twiddleCoef_128[256];
+
+    extern const float32_t twiddleCoef_256[512];
+
+    extern const float32_t twiddleCoef_512[1024];
+
+    extern const float32_t twiddleCoef_1024[2048];
+
+    extern const float32_t twiddleCoef_2048[4096];
+
+    extern const float32_t twiddleCoef_4096[8192];
+    #define twiddleCoef twiddleCoef_4096
+
+  /* Q31 */
+
+    extern const q31_t twiddleCoef_16_q31[24];
+
+    extern const q31_t twiddleCoef_32_q31[48];
+
+    extern const q31_t twiddleCoef_64_q31[96];
+
+    extern const q31_t twiddleCoef_128_q31[192];
+
+    extern const q31_t twiddleCoef_256_q31[384];
+
+    extern const q31_t twiddleCoef_512_q31[768];
+
+    extern const q31_t twiddleCoef_1024_q31[1536];
+
+    extern const q31_t twiddleCoef_2048_q31[3072];
+
+    extern const q31_t twiddleCoef_4096_q31[6144];
+
+    extern const q15_t twiddleCoef_16_q15[24];
+
+    extern const q15_t twiddleCoef_32_q15[48];
+
+    extern const q15_t twiddleCoef_64_q15[96];
+
+    extern const q15_t twiddleCoef_128_q15[192];
+
+    extern const q15_t twiddleCoef_256_q15[384];
+
+    extern const q15_t twiddleCoef_512_q15[768];
+
+    extern const q15_t twiddleCoef_1024_q15[1536];
+
+    extern const q15_t twiddleCoef_2048_q15[3072];
+
+    extern const q15_t twiddleCoef_4096_q15[6144];
+
+  /* Double Precision Float RFFT twiddles */
+    extern const uint64_t twiddleCoefF64_rfft_32[32];
+
+    extern const uint64_t twiddleCoefF64_rfft_64[64];
+
+    extern const uint64_t twiddleCoefF64_rfft_128[128];
+
+    extern const uint64_t twiddleCoefF64_rfft_256[256];
+
+    extern const uint64_t twiddleCoefF64_rfft_512[512];
+
+    extern const uint64_t twiddleCoefF64_rfft_1024[1024];
+
+    extern const uint64_t twiddleCoefF64_rfft_2048[2048];
+
+    extern const uint64_t twiddleCoefF64_rfft_4096[4096];
+
+    extern const float32_t twiddleCoef_rfft_32[32];
+
+    extern const float32_t twiddleCoef_rfft_64[64];
+
+    extern const float32_t twiddleCoef_rfft_128[128];
+
+    extern const float32_t twiddleCoef_rfft_256[256];
+
+    extern const float32_t twiddleCoef_rfft_512[512];
+
+    extern const float32_t twiddleCoef_rfft_1024[1024];
+
+    extern const float32_t twiddleCoef_rfft_2048[2048];
+
+    extern const float32_t twiddleCoef_rfft_4096[4096];
+
+  /* Double precision floating-point bit reversal tables */
+
+    #define ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH ((uint16_t)12)
+    extern const uint16_t armBitRevIndexTableF64_16[ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH ((uint16_t)24)
+    extern const uint16_t armBitRevIndexTableF64_32[ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH ((uint16_t)56)
+    extern const uint16_t armBitRevIndexTableF64_64[ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH ((uint16_t)112)
+    extern const uint16_t armBitRevIndexTableF64_128[ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH ((uint16_t)240)
+    extern const uint16_t armBitRevIndexTableF64_256[ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH ((uint16_t)480)
+    extern const uint16_t armBitRevIndexTableF64_512[ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH ((uint16_t)992)
+    extern const uint16_t armBitRevIndexTableF64_1024[ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH ((uint16_t)1984)
+    extern const uint16_t armBitRevIndexTableF64_2048[ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH ((uint16_t)4032)
+    extern const uint16_t armBitRevIndexTableF64_4096[ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH];
+  /* floating-point bit reversal tables */
+
+    #define ARMBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20)
+    extern const uint16_t armBitRevIndexTable16[ARMBITREVINDEXTABLE_16_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48)
+    extern const uint16_t armBitRevIndexTable32[ARMBITREVINDEXTABLE_32_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56)
+    extern const uint16_t armBitRevIndexTable64[ARMBITREVINDEXTABLE_64_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208)
+    extern const uint16_t armBitRevIndexTable128[ARMBITREVINDEXTABLE_128_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440)
+    extern const uint16_t armBitRevIndexTable256[ARMBITREVINDEXTABLE_256_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448)
+    extern const uint16_t armBitRevIndexTable512[ARMBITREVINDEXTABLE_512_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800)
+    extern const uint16_t armBitRevIndexTable1024[ARMBITREVINDEXTABLE_1024_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808)
+    extern const uint16_t armBitRevIndexTable2048[ARMBITREVINDEXTABLE_2048_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032)
+    extern const uint16_t armBitRevIndexTable4096[ARMBITREVINDEXTABLE_4096_TABLE_LENGTH];
+
+
+  /* fixed-point bit reversal tables */
+
+    #define ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12)
+    extern const uint16_t armBitRevIndexTable_fixed_16[ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24)
+    extern const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56)
+    extern const uint16_t armBitRevIndexTable_fixed_64[ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112)
+    extern const uint16_t armBitRevIndexTable_fixed_128[ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240)
+    extern const uint16_t armBitRevIndexTable_fixed_256[ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480)
+    extern const uint16_t armBitRevIndexTable_fixed_512[ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992)
+    extern const uint16_t armBitRevIndexTable_fixed_1024[ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984)
+    extern const uint16_t armBitRevIndexTable_fixed_2048[ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH];
+
+    #define ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032)
+    extern const uint16_t armBitRevIndexTable_fixed_4096[ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH];
+
+    extern const float32_t realCoefA[8192];
+    extern const float32_t realCoefB[8192];
+
+    extern const q31_t realCoefAQ31[8192];
+    extern const q31_t realCoefBQ31[8192];
+
+    extern const q15_t realCoefAQ15[8192];
+    extern const q15_t realCoefBQ15[8192];
+
+    extern const float32_t Weights_128[256];
+    extern const float32_t cos_factors_128[128];
+
+    extern const float32_t Weights_512[1024];
+    extern const float32_t cos_factors_512[512];
+
+    extern const float32_t Weights_2048[4096];
+    extern const float32_t cos_factors_2048[2048];
+
+    extern const float32_t Weights_8192[16384];
+    extern const float32_t cos_factors_8192[8192];
+
+    extern const q15_t WeightsQ15_128[256];
+    extern const q15_t cos_factorsQ15_128[128];
+
+    extern const q15_t WeightsQ15_512[1024];
+    extern const q15_t cos_factorsQ15_512[512];
+
+    extern const q15_t WeightsQ15_2048[4096];
+    extern const q15_t cos_factorsQ15_2048[2048];
+
+    extern const q15_t WeightsQ15_8192[16384];
+    extern const q15_t cos_factorsQ15_8192[8192];
+
+    extern const q31_t WeightsQ31_128[256];
+    extern const q31_t cos_factorsQ31_128[128];
+
+    extern const q31_t WeightsQ31_512[1024];
+    extern const q31_t cos_factorsQ31_512[512];
+
+    extern const q31_t WeightsQ31_2048[4096];
+    extern const q31_t cos_factorsQ31_2048[2048];
+
+    extern const q31_t WeightsQ31_8192[16384];
+    extern const q31_t cos_factorsQ31_8192[8192];
+
+
+    extern const q15_t armRecipTableQ15[64];
+
+    extern const q31_t armRecipTableQ31[64];
+
+  /* Tables for Fast Math Sine and Cosine */
+    extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
+
+    extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1];
+
+    extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1];
+
+
+  /* Accurate scalar sqrt */
+       extern const q31_t sqrt_initial_lut_q31[32];
+
+       extern const q15_t sqrt_initial_lut_q15[16];
+
+#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+       extern const q15_t sqrtTable_Q15[256];
+       extern const q31_t sqrtTable_Q31[256];
+       extern const unsigned char hwLUT[256];
+#endif 
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+       extern const float32_t exp_tab[8];
+       extern const float32_t __logf_lut_f32[8];
+#endif 
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*  ARM_COMMON_TABLES_H */
+
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_common_tables_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_common_tables_f16.h
new file mode 100755
index 00000000000..c84a766adf6
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_common_tables_f16.h
@@ -0,0 +1,95 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_common_tables_f16.h
+ * Description:  Extern declaration for common tables
+ *
+ * @version  V1.10.0
+ * @date     08 July 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARM_COMMON_TABLES_F16_H
+#define ARM_COMMON_TABLES_F16_H
+
+#include "arm_math_types_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+  /* F16 */
+  #if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
+    extern const float16_t twiddleCoefF16_16[32];
+
+    extern const float16_t twiddleCoefF16_32[64];
+
+    extern const float16_t twiddleCoefF16_64[128];
+
+    extern const float16_t twiddleCoefF16_128[256];
+
+    extern const float16_t twiddleCoefF16_256[512];
+
+    extern const float16_t twiddleCoefF16_512[1024];
+
+    extern const float16_t twiddleCoefF16_1024[2048];
+
+    extern const float16_t twiddleCoefF16_2048[4096];
+
+    extern const float16_t twiddleCoefF16_4096[8192];
+    #define twiddleCoefF16 twiddleCoefF16_4096
+  
+ 
+  extern const float16_t twiddleCoefF16_rfft_32[32];
+
+  extern const float16_t twiddleCoefF16_rfft_64[64];
+
+  extern const float16_t twiddleCoefF16_rfft_128[128];
+
+  extern const float16_t twiddleCoefF16_rfft_256[256];
+
+  extern const float16_t twiddleCoefF16_rfft_512[512];
+
+  extern const float16_t twiddleCoefF16_rfft_1024[1024];
+
+  extern const float16_t twiddleCoefF16_rfft_2048[2048];
+
+  extern const float16_t twiddleCoefF16_rfft_4096[4096];
+
+  #endif /* ARMAC5 */
+    
+
+#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+       extern const float16_t exp_tab_f16[8];
+       extern const float16_t __logf_lut_f16[8];
+#endif
+#endif 
+       
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*  _ARM_COMMON_TABLES_F16_H */
+
+  
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_const_structs.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_const_structs.h
new file mode 100644
index 00000000000..32c1a436e67
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_const_structs.h
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_const_structs.h
+ * Description:  Constant structs that are initialized for user convenience.
+ *               For example, some can be given as arguments to the arm_cfft_f32() function.
+ *
+ * @version  V1.10.0
+ * @date     08 July 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARM_CONST_STRUCTS_H
+#define ARM_CONST_STRUCTS_H
+
+#include "arm_math_types.h"
+#include "arm_common_tables.h"
+#include "dsp/transform_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len16;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len32;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len64;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len128;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len256;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len512;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len1024;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len2048;
+   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len4096;
+
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len16;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len32;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len64;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len128;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len256;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len512;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048;
+   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096;
+
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len16;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len32;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len64;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len128;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len256;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len512;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048;
+   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096;
+
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len16;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len32;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len64;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len128;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len256;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len512;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048;
+   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096;
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_const_structs_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_const_structs_f16.h
new file mode 100755
index 00000000000..3a520b6b94f
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_const_structs_f16.h
@@ -0,0 +1,59 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_const_structs_f16.h
+ * Description:  Constant structs that are initialized for user convenience.
+ *               For example, some can be given as arguments to the arm_cfft_f16() function.
+ *
+ * @version  V1.10.0
+ * @date     08 July 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARM_CONST_STRUCTS_F16_H
+#define ARM_CONST_STRUCTS_F16_H
+
+#include "arm_math_types_f16.h"
+#include "arm_common_tables.h"
+#include "arm_common_tables_f16.h"
+#include "dsp/transform_functions_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len16;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len32;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len64;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len128;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len256;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len512;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len1024;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len2048;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len4096;
+#endif
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_helium_utils.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_helium_utils.h
old mode 100644
new mode 100755
similarity index 64%
rename from Libraries/CMSIS/5.9.0/DSP/Include/arm_helium_utils.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_helium_utils.h
index 55afd6471be..65167678261
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_helium_utils.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_helium_utils.h
@@ -26,21 +26,23 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_UTILS_HELIUM_H_
-#define _ARM_UTILS_HELIUM_H_
+#ifndef ARM_UTILS_HELIUM_H_
+#define ARM_UTILS_HELIUM_H_
 
-#ifdef __cplusplus
-extern "C" {
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 /***************************************
 
 Definitions available for MVEF and MVEI
 
 ***************************************/
-#if (defined(ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)) && \
-    !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI))  && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define INACTIVELANE            0 /* inactive lane content */
 
-#define INACTIVELANE 0 /* inactive lane content */
 
 #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) */
 
@@ -49,63 +51,70 @@ Definitions available for MVEF and MVEI
 Definitions available for MVEF only
 
 ***************************************/
-#if (defined(ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF))  && !defined(ARM_MATH_AUTOVECTORIZE)
 
 __STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in)
 {
     float32_t acc;
 
-    acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) + vgetq_lane(in, 2) + vgetq_lane(in, 3);
+    acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) +
+          vgetq_lane(in, 2) + vgetq_lane(in, 3);
 
     return acc;
 }
 
+
+
+
 /* newton initial guess */
-#define INVSQRT_MAGIC_F32 0x5f3759df
-#define INV_NEWTON_INIT_F32 0x7EF127EA
-
-#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart) \
-    {                                                  \
-        float32x4_t tmp;                               \
-                                                       \
-        /* tmp = xhalf * x * x */                      \
-        tmp = vmulq(xStart, xStart);                   \
-        tmp = vmulq(tmp, xHalf);                       \
-        /* (1.5f - xhalf * x * x) */                   \
-        tmp = vsubq(vdupq_n_f32(1.5f), tmp);           \
-        /* x = x*(1.5f-xhalf*x*x); */                  \
-        invSqrt = vmulq(tmp, xStart);                  \
-    }
+#define INVSQRT_MAGIC_F32           0x5f3759df
+#define INV_NEWTON_INIT_F32         0x7EF127EA
+
+
+#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
+{                                                     \
+    float32x4_t tmp;                                  \
+                                                      \
+    /* tmp = xhalf * x * x */                         \
+    tmp = vmulq(xStart, xStart);                      \
+    tmp = vmulq(tmp, xHalf);                          \
+    /* (1.5f - xhalf * x * x) */                      \
+    tmp = vsubq(vdupq_n_f32(1.5f), tmp);              \
+    /* x = x*(1.5f-xhalf*x*x); */                     \
+    invSqrt = vmulq(tmp, xStart);                     \
+}
 #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */
 
+
 /***************************************
 
 Definitions available for f16 datatype with HW acceleration only
 
 ***************************************/
 #if defined(ARM_FLOAT16_SUPPORTED)
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined (ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
 {
     float16x8_t tmpVec;
     _Float16 acc;
 
-    tmpVec = (float16x8_t)vrev32q_s16((int16x8_t)in);
+    tmpVec = (float16x8_t) vrev32q_s16((int16x8_t) in);
     in = vaddq_f16(tmpVec, in);
-    tmpVec = (float16x8_t)vrev64q_s32((int32x4_t)in);
+    tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in);
     in = vaddq_f16(tmpVec, in);
     acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4);
 
     return acc;
 }
 
-__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(float16x8_t vecIn)
+__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(
+    float16x8_t   vecIn)
 {
-    float16x8_t vecTmp, vecOut;
-    uint32_t tmp;
+    float16x8_t   vecTmp, vecOut;
+    uint32_t    tmp = 0;
 
-    vecTmp = (float16x8_t)vrev64q_s32((int32x4_t)vecIn);
+    vecTmp = (float16x8_t) vrev64q_s32((int32x4_t) vecIn);
     // TO TRACK : using canonical addition leads to unefficient code generation for f16
     // vecTmp = vecTmp + vecAccCpx0;
     /*
@@ -118,7 +127,7 @@ __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(float16x8_t vecIn
     /*
      * shift left, random tmp insertion in bottom
      */
-    vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut), &tmp, 32));
+    vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut)   , &tmp, 32));
     /*
      * Compute:
      *    DONTCARE     |    DONTCARE     | re0+re1+re0+re1 |im0+im1+im0+im1
@@ -132,51 +141,56 @@ __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(float16x8_t vecIn
     return vecOut;
 }
 
-#define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im)                 \
-    {                                                            \
-        float16x8_t vecOut = __mve_cmplx_sum_intra_vec_f16(vec); \
-        Re = vgetq_lane(vecOut, 4);                              \
-        Im = vgetq_lane(vecOut, 5);                              \
-    }
 
-__STATIC_FORCEINLINE void mve_cmplx_sum_intra_vec_f16(float16x8_t vecIn, float16_t *pOut)
+#define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im)                \
+{                                                               \
+    float16x8_t   vecOut = __mve_cmplx_sum_intra_vec_f16(vec);    \
+    Re = vgetq_lane(vecOut, 4);                                 \
+    Im = vgetq_lane(vecOut, 5);                                 \
+}
+
+__STATIC_FORCEINLINE void mve_cmplx_sum_intra_vec_f16(
+    float16x8_t   vecIn,
+    float16_t  *pOut)
 {
-    float16x8_t vecOut = __mve_cmplx_sum_intra_vec_f16(vecIn);
+    float16x8_t   vecOut = __mve_cmplx_sum_intra_vec_f16(vecIn);
     /*
      * Cmplx sum is in 4rd & 5th f16 elt
      * use 32-bit extraction
      */
-    *(float32_t *)pOut = ((float32x4_t)vecOut)[2];
+    *(float32_t *) pOut = ((float32x4_t) vecOut)[2];
 }
 
-#define INVSQRT_MAGIC_F16 0x59ba /*  ( 0x1ba = 0x3759df >> 13) */
+
+#define INVSQRT_MAGIC_F16           0x59ba      /*  ( 0x1ba = 0x3759df >> 13) */
 
 /* canonical version of INVSQRT_NEWTON_MVE_F16 leads to bad performance */
-#define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart) \
-    {                                                  \
-        float16x8_t tmp;                               \
-                                                       \
-        /* tmp = xhalf * x * x */                      \
-        tmp = vmulq(xStart, xStart);                   \
-        tmp = vmulq(tmp, xHalf);                       \
-        /* (1.5f - xhalf * x * x) */                   \
-        tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp); \
-        /* x = x*(1.5f-xhalf*x*x); */                  \
-        invSqrt = vmulq(tmp, xStart);                  \
-    }
+#define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart)                  \
+{                                                                       \
+    float16x8_t tmp;                                                      \
+                                                                        \
+    /* tmp = xhalf * x * x */                                           \
+    tmp = vmulq(xStart, xStart);                                        \
+    tmp = vmulq(tmp, xHalf);                                            \
+    /* (1.5f - xhalf * x * x) */                                        \
+    tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp);                      \
+    /* x = x*(1.5f-xhalf*x*x); */                                       \
+    invSqrt = vmulq(tmp, xStart);                                       \
+}
 
 #endif
-#endif
+#endif 
 
 /***************************************
 
 Definitions available for MVEI and MVEF only
 
 ***************************************/
-#if (defined(ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)) && \
-    !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI))  && !defined(ARM_MATH_AUTOVECTORIZE)
 /* Following functions are used to transpose matrix in f32 and q31 cases */
-__STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve(uint32_t *pDataSrc, uint32_t *pDataDest)
+__STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve(
+    uint32_t * pDataSrc,
+    uint32_t * pDataDest)
 {
     static const uint32x4_t vecOffs = { 0, 2, 1, 3 };
     /*
@@ -191,10 +205,12 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve(uint32_t *pDataSrc, uint3
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve(uint32_t *pDataSrc, uint32_t *pDataDest)
+__STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve(
+    uint32_t * pDataSrc,
+    uint32_t * pDataDest)
 {
-    const uint32x4_t vecOffs1 = { 0, 3, 6, 1 };
-    const uint32x4_t vecOffs2 = { 4, 7, 2, 5 };
+    const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
+    const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
     /*
      *
      *  | 0   1   2 |       | 0   3   6 |  4 x 32 flattened version | 0   3   6   1 |
@@ -202,8 +218,8 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve(uint32_t *pDataSrc, uint3
      *  | 6   7   8 |       | 2   5   8 |       (row major)         | 8   .   .   . |
      *
      */
-    uint32x4_t vecIn1 = vldrwq_u32((uint32_t const *)pDataSrc);
-    uint32x4_t vecIn2 = vldrwq_u32((uint32_t const *)&pDataSrc[4]);
+    uint32x4_t vecIn1 = vldrwq_u32((uint32_t const *) pDataSrc);
+    uint32x4_t vecIn2 = vldrwq_u32((uint32_t const *) &pDataSrc[4]);
 
     vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs1, vecIn1);
     vstrwq_scatter_shifted_offset_u32(pDataDest, vecOffs2, vecIn2);
@@ -213,7 +229,7 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_3x3_mve(uint32_t *pDataSrc, uint3
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t *pDataSrc, uint32_t *pDataDest)
+__STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t * pDataSrc, uint32_t * pDataDest)
 {
     /*
      * 4x4 Matrix transposition
@@ -227,7 +243,7 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t *pDataSrc, uint3
 
     uint32x4x4_t vecIn;
 
-    vecIn = vld4q((uint32_t const *)pDataSrc);
+    vecIn = vld4q((uint32_t const *) pDataSrc);
     vstrwq(pDataDest, vecIn.val[0]);
     pDataDest += 4;
     vstrwq(pDataDest, vecIn.val[1]);
@@ -239,12 +255,16 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_4x4_mve(uint32_t *pDataSrc, uint3
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(uint16_t srcRows, uint16_t srcCols,
-                                                           uint32_t *pDataSrc, uint32_t *pDataDest)
+
+__STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(
+    uint16_t    srcRows,
+    uint16_t    srcCols,
+    uint32_t  * pDataSrc,
+    uint32_t  * pDataDest)
 {
     uint32x4_t vecOffs;
-    uint32_t i;
-    uint32_t blkCnt;
+    uint32_t  i;
+    uint32_t  blkCnt;
     uint32_t const *pDataC;
     uint32_t *pDataDestR;
     uint32x4_t vecIn;
@@ -253,14 +273,16 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(uint16_t srcRows, uin
     vecOffs = vecOffs * srcCols;
 
     i = srcCols;
-    do {
-        pDataC = (uint32_t const *)pDataSrc;
+    do
+    {
+        pDataC = (uint32_t const *) pDataSrc;
         pDataDestR = pDataDest;
 
         blkCnt = srcRows >> 2;
-        while (blkCnt > 0U) {
+        while (blkCnt > 0U)
+        {
             vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs);
-            vstrwq(pDataDestR, vecIn);
+            vstrwq(pDataDestR, vecIn); 
             pDataDestR += 4;
             pDataC = pDataC + srcCols * 4;
             /*
@@ -273,7 +295,8 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(uint16_t srcRows, uin
          * tail
          */
         blkCnt = srcRows & 3;
-        if (blkCnt > 0U) {
+        if (blkCnt > 0U)
+        {
             mve_pred16_t p0 = vctp32q(blkCnt);
             vecIn = vldrwq_gather_shifted_offset_u32(pDataC, vecOffs);
             vstrwq_p(pDataDestR, vecIn, p0);
@@ -281,28 +304,34 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(uint16_t srcRows, uin
 
         pDataSrc += 1;
         pDataDest += srcRows;
-    } while (--i);
+    }
+    while (--i);
 
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(uint16_t srcRows, uint16_t srcCols,
-                                                     uint32_t *pDataSrc, uint16_t dstRows,
-                                                     uint16_t dstCols, uint32_t *pDataDest)
+__STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(
+    uint16_t    srcRows,
+    uint16_t    srcCols,
+    uint32_t   *pDataSrc,
+    uint16_t    dstRows,
+    uint16_t    dstCols,
+    uint32_t   *pDataDest)
 {
-    uint32_t i;
+    uint32_t        i;
     uint32_t const *pDataC;
-    uint32_t *pDataRow;
-    uint32_t *pDataDestR, *pDataDestRow;
-    uint32x4_t vecOffsRef, vecOffsCur;
-    uint32_t blkCnt;
-    uint32x4_t vecIn;
+    uint32_t       *pDataRow;
+    uint32_t       *pDataDestR, *pDataDestRow;
+    uint32x4_t      vecOffsRef, vecOffsCur;
+    uint32_t        blkCnt;
+    uint32x4_t      vecIn;
 
 #ifdef ARM_MATH_MATRIX_CHECK
     /*
      * Check for matrix mismatch condition
      */
-    if ((srcRows != dstCols) || (srcCols != dstRows)) {
+    if ((srcRows != dstCols) || (srcCols != dstRows))
+    {
         /*
          * Set status as ARM_MATH_SIZE_MISMATCH
          */
@@ -323,28 +352,31 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(uint16_t srcRows, uint16_t
     pDataRow = pDataSrc;
     pDataDestRow = pDataDest;
     i = srcCols;
-    do {
-        pDataC = (uint32_t const *)pDataRow;
+    do
+    {
+        pDataC = (uint32_t const *) pDataRow;
         pDataDestR = pDataDestRow;
         vecOffsCur = vecOffsRef;
 
         blkCnt = (srcRows * CMPLX_DIM) >> 2;
-        while (blkCnt > 0U) {
+        while (blkCnt > 0U)
+        {
             vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur);
-            vstrwq(pDataDestR, vecIn);
+            vstrwq(pDataDestR, vecIn); 
             pDataDestR += 4;
             vecOffsCur = vaddq(vecOffsCur, (srcCols << 2));
             /*
              * Decrement the blockSize loop counter
              */
-            blkCnt--;
+             blkCnt--;
         }
         /*
          * tail
          * (will be merged thru tail predication)
          */
         blkCnt = (srcRows * CMPLX_DIM) & 3;
-        if (blkCnt > 0U) {
+        if (blkCnt > 0U)
+        {
             mve_pred16_t p0 = vctp32q(blkCnt);
             vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur);
             vstrwq_p(pDataDestR, vecIn, p0);
@@ -352,12 +384,13 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(uint16_t srcRows, uint16_t
 
         pDataRow += CMPLX_DIM;
         pDataDestRow += (srcRows * CMPLX_DIM);
-    } while (--i);
+    }
+    while (--i);
 
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_16bit_2x2(uint16_t *pDataSrc, uint16_t *pDataDest)
+__STATIC_INLINE arm_status arm_mat_trans_16bit_2x2(uint16_t * pDataSrc, uint16_t * pDataDest)
 {
     pDataDest[0] = pDataSrc[0];
     pDataDest[3] = pDataSrc[3];
@@ -367,11 +400,11 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_2x2(uint16_t *pDataSrc, uint16_t
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_16bit_3x3_mve(uint16_t *pDataSrc, uint16_t *pDataDest)
+__STATIC_INLINE arm_status arm_mat_trans_16bit_3x3_mve(uint16_t * pDataSrc, uint16_t * pDataDest)
 {
     static const uint16_t stridesTr33[8] = { 0, 3, 6, 1, 4, 7, 2, 5 };
-    uint16x8_t vecOffs1;
-    uint16x8_t vecIn1;
+    uint16x8_t    vecOffs1;
+    uint16x8_t    vecIn1;
     /*
      *
      *  | 0   1   2 |       | 0   3   6 |  8 x 16 flattened version | 0   3   6   1   4   7   2   5 |
@@ -379,8 +412,8 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_3x3_mve(uint16_t *pDataSrc, uint1
      *  | 6   7   8 |       | 2   5   8 |       (row major)
      *
      */
-    vecOffs1 = vldrhq_u16((uint16_t const *)stridesTr33);
-    vecIn1 = vldrhq_u16((uint16_t const *)pDataSrc);
+    vecOffs1 = vldrhq_u16((uint16_t const *) stridesTr33);
+    vecIn1 = vldrhq_u16((uint16_t const *) pDataSrc);
 
     vstrhq_scatter_shifted_offset_u16(pDataDest, vecOffs1, vecIn1);
 
@@ -389,13 +422,14 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_3x3_mve(uint16_t *pDataSrc, uint1
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_16bit_4x4_mve(uint16_t *pDataSrc, uint16_t *pDataDest)
+
+__STATIC_INLINE arm_status arm_mat_trans_16bit_4x4_mve(uint16_t * pDataSrc, uint16_t * pDataDest)
 {
     static const uint16_t stridesTr44_1[8] = { 0, 4, 8, 12, 1, 5, 9, 13 };
     static const uint16_t stridesTr44_2[8] = { 2, 6, 10, 14, 3, 7, 11, 15 };
-    uint16x8_t vecOffs1, vecOffs2;
-    uint16x8_t vecIn1, vecIn2;
-    uint16_t const *pDataSrcVec = (uint16_t const *)pDataSrc;
+    uint16x8_t    vecOffs1, vecOffs2;
+    uint16x8_t    vecIn1, vecIn2;
+    uint16_t const * pDataSrcVec = (uint16_t const *) pDataSrc;
 
     /*
      * 4x4 Matrix transposition
@@ -406,8 +440,8 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_4x4_mve(uint16_t *pDataSrc, uint1
      * | 12  13  14  15 |       | 3   7   11  15 |
      */
 
-    vecOffs1 = vldrhq_u16((uint16_t const *)stridesTr44_1);
-    vecOffs2 = vldrhq_u16((uint16_t const *)stridesTr44_2);
+    vecOffs1 = vldrhq_u16((uint16_t const *) stridesTr44_1);
+    vecOffs2 = vldrhq_u16((uint16_t const *) stridesTr44_2);
     vecIn1 = vldrhq_u16(pDataSrcVec);
     pDataSrcVec += 8;
     vecIn2 = vldrhq_u16(pDataSrcVec);
@@ -415,31 +449,39 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_4x4_mve(uint16_t *pDataSrc, uint1
     vstrhq_scatter_shifted_offset_u16(pDataDest, vecOffs1, vecIn1);
     vstrhq_scatter_shifted_offset_u16(pDataDest, vecOffs2, vecIn2);
 
+
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_trans_16bit_generic(uint16_t srcRows, uint16_t srcCols,
-                                                       uint16_t *pDataSrc, uint16_t *pDataDest)
+
+
+__STATIC_INLINE arm_status arm_mat_trans_16bit_generic(
+    uint16_t    srcRows,
+    uint16_t    srcCols,
+    uint16_t  * pDataSrc,
+    uint16_t  * pDataDest)
 {
-    uint16x8_t vecOffs;
-    uint32_t i;
-    uint32_t blkCnt;
+    uint16x8_t    vecOffs;
+    uint32_t        i;
+    uint32_t        blkCnt;
     uint16_t const *pDataC;
-    uint16_t *pDataDestR;
-    uint16x8_t vecIn;
+    uint16_t       *pDataDestR;
+    uint16x8_t    vecIn;
 
     vecOffs = vidupq_u16((uint32_t)0, 1);
     vecOffs = vecOffs * srcCols;
 
     i = srcCols;
-    while (i > 0U) {
-        pDataC = (uint16_t const *)pDataSrc;
+    while(i > 0U)
+    {
+        pDataC = (uint16_t const *) pDataSrc;
         pDataDestR = pDataDest;
 
         blkCnt = srcRows >> 3;
-        while (blkCnt > 0U) {
+        while (blkCnt > 0U)
+        {
             vecIn = vldrhq_gather_shifted_offset_u16(pDataC, vecOffs);
-            vstrhq_u16(pDataDestR, vecIn);
+            vstrhq_u16(pDataDestR, vecIn); 
             pDataDestR += 8;
             pDataC = pDataC + srcCols * 8;
             /*
@@ -452,7 +494,8 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_generic(uint16_t srcRows, uint16_
          * tail
          */
         blkCnt = srcRows & 7;
-        if (blkCnt > 0U) {
+        if (blkCnt > 0U)
+        {
             mve_pred16_t p0 = vctp16q(blkCnt);
             vecIn = vldrhq_gather_shifted_offset_u16(pDataC, vecOffs);
             vstrhq_p_u16(pDataDestR, vecIn, p0);
@@ -465,24 +508,30 @@ __STATIC_INLINE arm_status arm_mat_trans_16bit_generic(uint16_t srcRows, uint16_
     return (ARM_MATH_SUCCESS);
 }
 
-__STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(uint16_t srcRows, uint16_t srcCols,
-                                                     uint16_t *pDataSrc, uint16_t dstRows,
-                                                     uint16_t dstCols, uint16_t *pDataDest)
+
+__STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(
+    uint16_t    srcRows,
+    uint16_t    srcCols,
+    uint16_t   *pDataSrc,
+    uint16_t    dstRows,
+    uint16_t    dstCols,
+    uint16_t   *pDataDest)
 {
     static const uint16_t loadCmplxCol[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
-    int i;
-    uint16x8_t vecOffsRef, vecOffsCur;
+    int             i;
+    uint16x8_t    vecOffsRef, vecOffsCur;
     uint16_t const *pDataC;
-    uint16_t *pDataRow;
-    uint16_t *pDataDestR, *pDataDestRow;
-    uint32_t blkCnt;
-    uint16x8_t vecIn;
+    uint16_t       *pDataRow;
+    uint16_t       *pDataDestR, *pDataDestRow;
+    uint32_t        blkCnt;
+    uint16x8_t    vecIn;
 
 #ifdef ARM_MATH_MATRIX_CHECK
     /*
      * Check for matrix mismatch condition
      */
-    if ((srcRows != dstCols) || (srcCols != dstRows)) {
+    if ((srcRows != dstCols) || (srcCols != dstRows))
+    {
         /*
          * Set status as ARM_MATH_SIZE_MISMATCH
          */
@@ -497,26 +546,29 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(uint16_t srcRows, uint16_t
      * 2x2, 3x3 and 4x4 specialization to be added
      */
 
+
     /*
      * build  [0, 1, 2xcol, 2xcol+1, 4xcol, 4xcol+1, 6xcol, 6xcol+1]
      */
-    vecOffsRef = vldrhq_u16((uint16_t const *)loadCmplxCol);
-    vecOffsRef = vmulq(vecOffsRef, (uint16_t)(srcCols * CMPLX_DIM)) +
-                 viwdupq_u16((uint32_t)0, (uint16_t)2, 1);
+    vecOffsRef = vldrhq_u16((uint16_t const *) loadCmplxCol);
+    vecOffsRef = vmulq(vecOffsRef, (uint16_t) (srcCols * CMPLX_DIM))
+                    + viwdupq_u16((uint32_t)0, (uint16_t) 2, 1);
 
     pDataRow = pDataSrc;
     pDataDestRow = pDataDest;
     i = srcCols;
-    do {
-        pDataC = (uint16_t const *)pDataRow;
+    do
+    {
+        pDataC = (uint16_t const *) pDataRow;
         pDataDestR = pDataDestRow;
         vecOffsCur = vecOffsRef;
 
         blkCnt = (srcRows * CMPLX_DIM) >> 3;
-        while (blkCnt > 0U) {
+        while (blkCnt > 0U)
+        {
             vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur);
-            vstrhq(pDataDestR, vecIn);
-            pDataDestR += 8; // VEC_LANES_U16
+            vstrhq(pDataDestR, vecIn);  
+            pDataDestR+= 8; // VEC_LANES_U16
             vecOffsCur = vaddq(vecOffsCur, (srcCols << 3));
             /*
              * Decrement the blockSize loop counter
@@ -528,7 +580,8 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(uint16_t srcRows, uint16_t
          * (will be merged thru tail predication)
          */
         blkCnt = (srcRows * CMPLX_DIM) & 0x7;
-        if (blkCnt > 0U) {
+        if (blkCnt > 0U)
+        {
             mve_pred16_t p0 = vctp16q(blkCnt);
             vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur);
             vstrhq_p(pDataDestR, vecIn, p0);
@@ -536,7 +589,8 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(uint16_t srcRows, uint16_t
 
         pDataRow += CMPLX_DIM;
         pDataDestRow += (srcRows * CMPLX_DIM);
-    } while (--i);
+    }
+    while (--i);
 
     return (ARM_MATH_SUCCESS);
 }
@@ -547,22 +601,22 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(uint16_t srcRows, uint16_t
 Definitions available for MVEI only
 
 ***************************************/
-#if (defined(ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI))  && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #include "arm_common_tables.h"
 
-#define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32 - shift)) >> 32) & 0xffffffff)
-#define MVE_ASRL_SAT32(acc, shift) ((sqrshrl(acc, -(32 - shift)) >> 32) & 0xffffffff)
+#define MVE_ASRL_SAT16(acc, shift)          ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff)
+#define MVE_ASRL_SAT32(acc, shift)          ((sqrshrl(acc, -(32-shift)) >> 32) & 0xffffffff)
+
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || \
-    defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
 __STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
 {
-    q63x2_t vecTmpLL;
-    q31x4_t vecTmp0, vecTmp1;
-    q31_t scale;
-    q63_t tmp64;
-    q31x4_t vecNrm, vecDst, vecIdx, vecSignBits;
+    q63x2_t         vecTmpLL;
+    q31x4_t         vecTmp0, vecTmp1;
+    q31_t           scale;
+    q63_t           tmp64;
+    q31x4_t         vecNrm, vecDst, vecIdx, vecSignBits;
+
 
     vecSignBits = vclsq(vecIn);
     vecSignBits = vbicq_n_s32(vecSignBits, 1);
@@ -595,11 +649,11 @@ __STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
      */
     scale = 26 + (vecSignBits[0] >> 1);
     tmp64 = asrl(vecTmpLL[0], scale);
-    vecDst[0] = (q31_t)tmp64;
+    vecDst[0] = (q31_t) tmp64;
 
     scale = 26 + (vecSignBits[2] >> 1);
     tmp64 = asrl(vecTmpLL[1], scale);
-    vecDst[2] = (q31_t)tmp64;
+    vecDst[2] = (q31_t) tmp64;
 
     vecTmpLL = vmulltq_int(vecNrm, vecTmp0);
 
@@ -608,11 +662,11 @@ __STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
      */
     scale = 26 + (vecSignBits[1] >> 1);
     tmp64 = asrl(vecTmpLL[0], scale);
-    vecDst[1] = (q31_t)tmp64;
+    vecDst[1] = (q31_t) tmp64;
 
     scale = 26 + (vecSignBits[3] >> 1);
     tmp64 = asrl(vecTmpLL[1], scale);
-    vecDst[3] = (q31_t)tmp64;
+    vecDst[3] = (q31_t) tmp64;
     /*
      * set negative values to 0
      */
@@ -620,15 +674,12 @@ __STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
 
     return vecDst;
 }
-#endif
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || \
-    defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
 __STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn)
 {
-    q31x4_t vecTmpLev, vecTmpLodd, vecSignL;
-    q15x8_t vecTmp0, vecTmp1;
-    q15x8_t vecNrm, vecDst, vecIdx, vecSignBits;
+    q31x4_t         vecTmpLev, vecTmpLodd, vecSignL;
+    q15x8_t         vecTmp0, vecTmp1;
+    q15x8_t         vecNrm, vecDst, vecIdx, vecSignBits;
 
     vecDst = vuninitializedq_s16();
 
@@ -688,11 +739,10 @@ __STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn)
 
     return vecDst;
 }
-#endif
 
 #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) */
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math.h
new file mode 100644
index 00000000000..0e9ca5997ed
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math.h
@@ -0,0 +1,80 @@
+/******************************************************************************
+ * @file     arm_math.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef ARM_MATH_H
+#define ARM_MATH_H
+
+
+#include "arm_math_types.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions.h"  
+#include "dsp/interpolation_functions.h"
+#include "dsp/bayes_functions.h"
+#include "dsp/matrix_functions.h"
+#include "dsp/complex_math_functions.h"
+#include "dsp/statistics_functions.h"
+#include "dsp/controller_functions.h"
+#include "dsp/support_functions.h"
+#include "dsp/distance_functions.h"
+#include "dsp/svm_functions.h"
+#include "dsp/fast_math_functions.h"
+#include "dsp/transform_functions.h"
+#include "dsp/filtering_functions.h"
+#include "dsp/quaternion_math_functions.h"
+#include "dsp/window_functions.h"
+
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+
+//#define TABLE_SPACING_Q31     0x400000
+//#define TABLE_SPACING_Q15     0x80
+
+
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+
+#endif /* _ARM_MATH_H */
+
+/**
+ *
+ * End of file.
+ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_f16.h
old mode 100644
new mode 100755
similarity index 94%
rename from Libraries/CMSIS/5.9.0/DSP/Include/arm_math_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_f16.h
index 309ea68b192..34ca0e542fc
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_f16.h
@@ -23,13 +23,14 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_MATH_F16_H
-#define _ARM_MATH_F16_H
+#ifndef ARM_MATH_F16_H
+#define ARM_MATH_F16_H
 
 #include "arm_math.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #include "arm_math_types_f16.h"
@@ -49,8 +50,10 @@ extern "C" {
 #include "dsp/transform_functions_f16.h"
 #include "dsp/filtering_functions_f16.h"
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
 #endif /* _ARM_MATH_F16_H */
+
+
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_memory.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_memory.h
old mode 100644
new mode 100755
similarity index 60%
rename from Libraries/CMSIS/5.9.0/DSP/Include/arm_math_memory.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_memory.h
index 2f4be7eae6c..d4b4c3323ad
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_memory.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_memory.h
@@ -23,63 +23,68 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_MATH_MEMORY_H_
+#ifndef ARM_MATH_MEMORY_H_
 
-#define _ARM_MATH_MEMORY_H_
+#define ARM_MATH_MEMORY_H_
 
 #include "arm_math_types.h"
 
-#ifdef __cplusplus
-extern "C" {
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
   @brief definition to read/write two 16 bit values.
   @deprecated
  */
-#if defined(__CC_ARM)
-#define __SIMD32_TYPE int32_t __packed
-#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
-#define __SIMD32_TYPE int32_t
-#elif defined(__GNUC__)
-#define __SIMD32_TYPE int32_t
-#elif defined(__ICCARM__)
-#define __SIMD32_TYPE int32_t __packed
-#elif defined(__TI_ARM__)
-#define __SIMD32_TYPE int32_t
-#elif defined(__CSMC__)
-#define __SIMD32_TYPE int32_t
-#elif defined(__TASKING__)
-#define __SIMD32_TYPE __un(aligned) int32_t
-#elif defined(_MSC_VER)
-#define __SIMD32_TYPE int32_t
+#if   defined ( __CC_ARM )
+  #define __SIMD32_TYPE int32_t __packed
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __GNUC__ )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __ICCARM__ )
+  #define __SIMD32_TYPE int32_t __packed
+#elif defined ( __TI_ARM__ )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __CSMC__ )
+  #define __SIMD32_TYPE int32_t
+#elif defined ( __TASKING__ )
+  #define __SIMD32_TYPE __un(aligned) int32_t
+#elif defined(_MSC_VER )
+  #define __SIMD32_TYPE int32_t
 #else
-#error Unknown compiler
+  #error Unknown compiler
 #endif
 
-#define __SIMD32(addr) (*(__SIMD32_TYPE **)&(addr))
-#define __SIMD32_CONST(addr) ((__SIMD32_TYPE *)(addr))
-#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE *)(addr))
-#define __SIMD64(addr) (*(int64_t **)&(addr))
+#define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
+#define __SIMD32_CONST(addr)  ( (__SIMD32_TYPE * )   (addr))
+#define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE * )   (addr))
+#define __SIMD64(addr)        (*(      int64_t **) & (addr))
+
 
 /* SIMD replacement */
 
+
 /**
   @brief         Read 2 Q15 from Q15 pointer.
   @param[in]     pQ15      points to input value
   @return        Q31 value
  */
-__STATIC_FORCEINLINE q31_t read_q15x2(q15_t const *pQ15)
+__STATIC_FORCEINLINE q31_t read_q15x2 (
+  q15_t const * pQ15)
 {
-    q31_t val;
+  q31_t val;
 
 #ifdef __ARM_FEATURE_UNALIGNED
-    memcpy(&val, pQ15, 4);
+  memcpy (&val, pQ15, 4);
 #else
-    val = (pQ15[1] << 16) | (pQ15[0] & 0x0FFFF);
+  val = (pQ15[1] << 16) | (pQ15[0] & 0x0FFFF) ;
 #endif
 
-    return (val);
+  return (val);
 }
 
 /**
@@ -100,55 +105,58 @@ __STATIC_FORCEINLINE q31_t read_q15x2(q15_t const *pQ15)
   @brief         Write 2 Q15 to Q15 pointer and increment pointer afterwards.
   @param[in]     pQ15      points to input value
   @param[in]     value     Q31 value
-  @return        none
  */
-__STATIC_FORCEINLINE void write_q15x2_ia(q15_t **pQ15, q31_t value)
+__STATIC_FORCEINLINE void write_q15x2_ia (
+  q15_t ** pQ15,
+  q31_t    value)
 {
-    q31_t val = value;
+  q31_t val = value;
 #ifdef __ARM_FEATURE_UNALIGNED
-    memcpy(*pQ15, &val, 4);
+  memcpy (*pQ15, &val, 4);
 #else
-    (*pQ15)[0] = (q15_t)(val & 0x0FFFF);
-    (*pQ15)[1] = (q15_t)((val >> 16) & 0x0FFFF);
+  (*pQ15)[0] = (q15_t)(val & 0x0FFFF);
+  (*pQ15)[1] = (q15_t)((val >> 16) & 0x0FFFF);
 #endif
 
-    *pQ15 += 2;
+ *pQ15 += 2;
 }
 
 /**
   @brief         Write 2 Q15 to Q15 pointer.
   @param[in]     pQ15      points to input value
   @param[in]     value     Q31 value
-  @return        none
  */
-__STATIC_FORCEINLINE void write_q15x2(q15_t *pQ15, q31_t value)
+__STATIC_FORCEINLINE void write_q15x2 (
+  q15_t * pQ15,
+  q31_t   value)
 {
-    q31_t val = value;
+  q31_t val = value;
 
 #ifdef __ARM_FEATURE_UNALIGNED
-    memcpy(pQ15, &val, 4);
+  memcpy (pQ15, &val, 4);
 #else
-    pQ15[0] = (q15_t)(val & 0x0FFFF);
-    pQ15[1] = (q15_t)(val >> 16);
+  pQ15[0] = (q15_t)(val & 0x0FFFF);
+  pQ15[1] = (q15_t)(val >> 16);
 #endif
 }
 
+
 /**
   @brief         Read 4 Q7 from Q7 pointer
   @param[in]     pQ7       points to input value
   @return        Q31 value
  */
-__STATIC_FORCEINLINE q31_t read_q7x4(q7_t const *pQ7)
+__STATIC_FORCEINLINE q31_t read_q7x4 (
+  q7_t const * pQ7)
 {
-    q31_t val;
+  q31_t val;
 
 #ifdef __ARM_FEATURE_UNALIGNED
-    memcpy(&val, pQ7, 4);
+  memcpy (&val, pQ7, 4);
 #else
-    val = ((pQ7[3] & 0x0FF) << 24) | ((pQ7[2] & 0x0FF) << 16) | ((pQ7[1] & 0x0FF) << 8) |
-          (pQ7[0] & 0x0FF);
-#endif
-    return (val);
+  val =((pQ7[3] & 0x0FF) << 24)  | ((pQ7[2] & 0x0FF) << 16)  | ((pQ7[1] & 0x0FF) << 8)  | (pQ7[0] & 0x0FF);
+#endif 
+  return (val);
 }
 
 /**
@@ -169,24 +177,26 @@ __STATIC_FORCEINLINE q31_t read_q7x4(q7_t const *pQ7)
   @brief         Write 4 Q7 to Q7 pointer and increment pointer afterwards.
   @param[in]     pQ7       points to input value
   @param[in]     value     Q31 value
-  @return        none
  */
-__STATIC_FORCEINLINE void write_q7x4_ia(q7_t **pQ7, q31_t value)
+__STATIC_FORCEINLINE void write_q7x4_ia (
+  q7_t ** pQ7,
+  q31_t   value)
 {
-    q31_t val = value;
+  q31_t val = value;
 #ifdef __ARM_FEATURE_UNALIGNED
-    memcpy(*pQ7, &val, 4);
+  memcpy (*pQ7, &val, 4);
 #else
-    (*pQ7)[0] = (q7_t)(val & 0x0FF);
-    (*pQ7)[1] = (q7_t)((val >> 8) & 0x0FF);
-    (*pQ7)[2] = (q7_t)((val >> 16) & 0x0FF);
-    (*pQ7)[3] = (q7_t)((val >> 24) & 0x0FF);
+  (*pQ7)[0] = (q7_t)(val & 0x0FF);
+  (*pQ7)[1] = (q7_t)((val >> 8) & 0x0FF);
+  (*pQ7)[2] = (q7_t)((val >> 16) & 0x0FF);
+  (*pQ7)[3] = (q7_t)((val >> 24) & 0x0FF);
 
 #endif
-    *pQ7 += 4;
+  *pQ7 += 4;
 }
 
-#ifdef __cplusplus
+
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_types.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_types.h
new file mode 100755
index 00000000000..74ae8485f77
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_types.h
@@ -0,0 +1,645 @@
+/******************************************************************************
+ * @file     arm_math_types.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARM_MATH_TYPES_H_
+
+#define ARM_MATH_TYPES_H_
+
+#if defined(ARM_DSP_CUSTOM_CONFIG)
+#include "arm_dsp_config.h"
+#endif
+
+#ifndef ARM_DSP_ATTRIBUTE 
+#define ARM_DSP_ATTRIBUTE 
+#endif
+
+#ifndef ARM_DSP_TABLE_ATTRIBUTE 
+#define ARM_DSP_TABLE_ATTRIBUTE 
+#endif
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __CC_ARM )
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+
+#elif defined ( __APPLE_CC__ )
+  #pragma GCC diagnostic ignored "-Wold-style-cast"
+
+#elif defined(__clang__)
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wsign-conversion"
+  #pragma GCC diagnostic ignored "-Wconversion"
+  #pragma GCC diagnostic ignored "-Wunused-parameter"
+
+#elif defined ( __GNUC__ )
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wsign-conversion"
+  #pragma GCC diagnostic ignored "-Wconversion"
+  #pragma GCC diagnostic ignored "-Wunused-parameter"
+  // Disable some code having issue with GCC
+  #define ARM_DSP_BUILT_WITH_GCC 
+
+#elif defined ( __ICCARM__ )
+
+#elif defined ( __TI_ARM__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#elif defined ( _MSC_VER )
+
+#else
+  #error Unknown compiler
+#endif
+
+
+/* Included for instrinsics definitions */
+#if defined (_MSC_VER ) 
+#include <stdint.h>
+#define __STATIC_FORCEINLINE static __forceinline
+#define __STATIC_INLINE static __inline
+#define __ALIGNED(x) __declspec(align(x))
+#define __WEAK
+#elif defined ( __APPLE_CC__ )
+#include <stdint.h>
+#define  __ALIGNED(x) __attribute__((aligned(x)))
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) 
+#define __STATIC_INLINE static inline
+#define __WEAK
+#elif defined (__GNUC_PYTHON__)
+#include <stdint.h>
+#define  __ALIGNED(x) __attribute__((aligned(x)))
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) 
+#define __STATIC_INLINE static inline
+#define __WEAK
+#else
+#include "cmsis_compiler.h"
+#endif
+
+
+
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+/* evaluate ARM DSP feature */
+#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
+  #define ARM_MATH_DSP                   1
+#endif
+
+#if defined(ARM_MATH_NEON)
+  #if defined(_MSC_VER) && defined(_M_ARM64EC)
+    #include <arm64_neon.h>
+  #else
+    #include <arm_neon.h>
+  #endif
+  #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    #if !defined(ARM_MATH_NEON_FLOAT16)
+      #define ARM_MATH_NEON_FLOAT16
+    #endif
+  #endif
+#endif
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+#if defined(__ARM_FEATURE_MVE)
+#if __ARM_FEATURE_MVE
+  #if !defined(ARM_MATH_MVEI)
+    #define ARM_MATH_MVEI
+  #endif
+#endif
+
+#if defined(__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE & 2)
+  #if !defined(ARM_MATH_MVEF)
+    #define ARM_MATH_MVEF
+  #endif
+  #if !defined(ARM_MATH_MVE_FLOAT16)
+       #define ARM_MATH_MVE_FLOAT16
+  #endif
+#endif
+
+#endif /* defined (__ARM_FEATURE_MVE) */
+#endif /* !defined (ARM_MATH_AUTOVECTORIZE) */
+
+
+#if defined (ARM_MATH_HELIUM)
+  #if !defined(ARM_MATH_MVEF)
+    #define ARM_MATH_MVEF
+  #endif
+
+  #if !defined(ARM_MATH_MVEI)
+    #define ARM_MATH_MVEI
+  #endif
+
+  #if !defined(ARM_MATH_MVE_FLOAT16)
+       #define ARM_MATH_MVE_FLOAT16
+  #endif
+#endif
+
+
+
+#if   defined ( __CC_ARM )
+  /* Enter low optimization region - place directly above function definition */
+  #if defined( __ARM_ARCH_7EM__ )
+    #define LOW_OPTIMIZATION_ENTER \
+       _Pragma ("push")         \
+       _Pragma ("O1")
+  #else
+    #define LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #if defined ( __ARM_ARCH_7EM__ )
+    #define LOW_OPTIMIZATION_EXIT \
+       _Pragma ("pop")
+  #else
+    #define LOW_OPTIMIZATION_EXIT
+  #endif
+
+  /* Enter low optimization region - place directly above function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+  
+#elif defined ( __APPLE_CC__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __GNUC__ )
+  #define LOW_OPTIMIZATION_ENTER \
+       __attribute__(( optimize("-O1") ))
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __ICCARM__ )
+  /* Enter low optimization region - place directly above function definition */
+  #if defined ( __ARM_ARCH_7EM__ )
+    #define LOW_OPTIMIZATION_ENTER \
+       _Pragma ("optimize=low")
+  #else
+    #define LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #define LOW_OPTIMIZATION_EXIT
+
+  /* Enter low optimization region - place directly above function definition */
+  #if defined ( __ARM_ARCH_7EM__ )
+    #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
+       _Pragma ("optimize=low")
+  #else
+    #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TI_ARM__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __CSMC__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TASKING__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+       
+#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
+      #define LOW_OPTIMIZATION_ENTER
+      #define LOW_OPTIMIZATION_EXIT
+      #define IAR_ONLY_LOW_OPTIMIZATION_ENTER 
+      #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+#endif
+
+
+
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __CC_ARM )
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+
+#elif defined ( __APPLE_CC__ )
+
+#elif defined ( __GNUC__ )
+#pragma GCC diagnostic pop
+
+#elif defined ( __ICCARM__ )
+
+#elif defined ( __TI_ARM__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#elif defined ( _MSC_VER )
+
+#else
+  #error Unknown compiler
+#endif
+
+#ifdef   __cplusplus
+}
+#endif
+
+#if defined(__ARM_FEATURE_MVE) && __ARM_FEATURE_MVE
+#include <arm_mve.h>
+#endif
+
+#if defined(ARM_DSP_CONFIG_TABLES)
+#error("-DARM_DSP_CONFIG_TABLES no more supported. Use the new initialization functions to let the linker optimize the code size.")
+#endif
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup genericTypes Generic Types
+ * @{
+*/
+
+ /**
+   * @brief 8-bit fractional data type in 1.7 format.
+   */
+  typedef int8_t q7_t;
+
+  /**
+   * @brief 16-bit fractional data type in 1.15 format.
+   */
+  typedef int16_t q15_t;
+
+  /**
+   * @brief 32-bit fractional data type in 1.31 format.
+   */
+  typedef int32_t q31_t;
+
+  /**
+   * @brief 64-bit fractional data type in 1.63 format.
+   */
+  typedef int64_t q63_t;
+
+  /**
+   * @brief 32-bit floating-point type definition.
+   */
+#if !defined(__ICCARM__) || !(__ARM_FEATURE_MVE & 2)
+  typedef float float32_t;
+#endif
+
+  /**
+   * @brief 64-bit floating-point type definition.
+   */
+  typedef double float64_t;
+
+  /**
+   * @brief vector types
+   */
+#if defined(ARM_MATH_NEON) || (defined (ARM_MATH_MVEI)  && !defined(ARM_MATH_AUTOVECTORIZE))
+
+  /**
+   * @brief 64-bit fractional 128-bit vector data type in 1.63 format
+   */
+  typedef int64x2_t q63x2_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector data type in 1.31 format.
+   */
+  typedef int32x4_t q31x4_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector data type with 16-bit alignment in 1.15 format.
+   */
+  typedef __ALIGNED(2) int16x8_t q15x8_t;
+
+ /**
+   * @brief 8-bit fractional 128-bit vector data type with 8-bit alignment in 1.7 format.
+   */
+  typedef __ALIGNED(1) int8x16_t q7x16_t;
+
+    /**
+   * @brief 32-bit fractional 128-bit vector pair data type in 1.31 format.
+   */
+  typedef int32x4x2_t q31x4x2_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format.
+   */
+  typedef int32x4x4_t q31x4x4_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector pair data type in 1.15 format.
+   */
+  typedef int16x8x2_t q15x8x2_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format.
+   */
+  typedef int16x8x4_t q15x8x4_t;
+
+  /**
+   * @brief 8-bit fractional 128-bit vector pair data type in 1.7 format.
+   */
+  typedef int8x16x2_t q7x16x2_t;
+
+  /**
+   * @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format.
+   */
+   typedef int8x16x4_t q7x16x4_t;
+
+  /**
+   * @brief 32-bit fractional data type in 9.23 format.
+   */
+  typedef int32_t q23_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector data type in 9.23 format.
+   */
+  typedef int32x4_t q23x4_t;
+
+  /**
+   * @brief 64-bit status 128-bit vector data type.
+   */
+  typedef int64x2_t status64x2_t;
+
+  /**
+   * @brief 32-bit status 128-bit vector data type.
+   */
+  typedef int32x4_t status32x4_t;
+
+  /**
+   * @brief 16-bit status 128-bit vector data type.
+   */
+  typedef int16x8_t status16x8_t;
+
+  /**
+   * @brief 8-bit status 128-bit vector data type.
+   */
+  typedef int8x16_t status8x16_t;
+
+
+#endif
+
+#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF)  && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector type
+   */
+  typedef float32x4_t f32x4_t;
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector pair data type
+   */
+  typedef float32x4x2_t f32x4x2_t;
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector quadruplet data type
+   */
+  typedef float32x4x4_t f32x4x4_t;
+
+  /**
+   * @brief 32-bit ubiquitous 128-bit vector data type
+   */
+  typedef union _any32x4_t
+  {
+      float32x4_t     f;
+      int32x4_t       i;
+  } any32x4_t;
+
+#endif
+
+#if defined(ARM_MATH_NEON)
+  /**
+   * @brief 32-bit fractional 64-bit vector data type in 1.31 format.
+   */
+  typedef int32x2_t  q31x2_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector data type in 1.15 format.
+   */
+  typedef  __ALIGNED(2) int16x4_t q15x4_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector data type in 1.7 format.
+   */
+  typedef  __ALIGNED(1) int8x8_t q7x8_t;
+
+  /**
+   * @brief 32-bit float 64-bit vector data type.
+   */
+  typedef float32x2_t  f32x2_t;
+
+  /**
+   * @brief 32-bit floating-point 128-bit vector triplet data type
+   */
+  typedef float32x4x3_t f32x4x3_t;
+
+  /**
+   * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format
+   */
+  typedef int32x4x3_t q31x4x3_t;
+
+  /**
+   * @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format
+   */
+  typedef int16x8x3_t q15x8x3_t;
+
+  /**
+   * @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format
+   */
+  typedef int8x16x3_t q7x16x3_t;
+
+  /**
+   * @brief 32-bit floating-point 64-bit vector pair data type
+   */
+  typedef float32x2x2_t f32x2x2_t;
+
+  /**
+   * @brief 32-bit floating-point 64-bit vector triplet data type
+   */
+  typedef float32x2x3_t f32x2x3_t;
+
+  /**
+   * @brief 32-bit floating-point 64-bit vector quadruplet data type
+   */
+  typedef float32x2x4_t f32x2x4_t;
+
+  /**
+   * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format
+   */
+  typedef int32x2x2_t q31x2x2_t;
+
+  /**
+   * @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format
+   */
+  typedef int32x2x3_t q31x2x3_t;
+
+  /**
+   * @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format
+   */
+  typedef int32x4x3_t q31x2x4_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector pair data type in 1.15 format
+   */
+  typedef int16x4x2_t q15x4x2_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format
+   */
+  typedef int16x4x2_t q15x4x3_t;
+
+  /**
+   * @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format
+   */
+  typedef int16x4x3_t q15x4x4_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector pair data type in 1.7 format
+   */
+  typedef int8x8x2_t q7x8x2_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format
+   */
+  typedef int8x8x3_t q7x8x3_t;
+
+  /**
+   * @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format
+   */
+  typedef int8x8x4_t q7x8x4_t;
+
+  /**
+   * @brief 32-bit ubiquitous 64-bit vector data type
+   */
+  typedef union _any32x2_t
+  {
+      float32x2_t     f;
+      int32x2_t       i;
+  } any32x2_t;
+
+  /**
+   * @brief 32-bit status 64-bit vector data type.
+   */
+  typedef int32x4_t status32x2_t;
+
+  /**
+   * @brief 16-bit status 64-bit vector data type.
+   */
+  typedef int16x8_t status16x4_t;
+
+  /**
+   * @brief 8-bit status 64-bit vector data type.
+   */
+  typedef int8x16_t status8x8_t;
+
+#endif
+
+  /**
+   * @brief Error status returned by some functions in the library.
+   */
+  typedef enum
+  {
+    ARM_MATH_SUCCESS                 =  0,        /**< No error */
+    ARM_MATH_ARGUMENT_ERROR          = -1,        /**< One or more arguments are incorrect */
+    ARM_MATH_LENGTH_ERROR            = -2,        /**< Length of data buffer is incorrect */
+    ARM_MATH_SIZE_MISMATCH           = -3,        /**< Size of matrices is not compatible with the operation */
+    ARM_MATH_NANINF                  = -4,        /**< Not-a-number (NaN) or infinity is generated */
+    ARM_MATH_SINGULAR                = -5,        /**< Input matrix is singular and cannot be inverted */
+    ARM_MATH_TEST_FAILURE            = -6,        /**< Test Failed */
+    ARM_MATH_DECOMPOSITION_FAILURE   = -7         /**< Decomposition Failed */
+  } arm_status;
+
+/**
+ * @} // endgroup generic
+*/
+
+
+#define F64_MAX   ((float64_t)DBL_MAX)
+#define F32_MAX   ((float32_t)FLT_MAX)
+
+
+
+#define F64_MIN   (-DBL_MAX)
+#define F32_MIN   (-FLT_MAX)
+
+
+
+#define F64_ABSMAX   ((float64_t)DBL_MAX)
+#define F32_ABSMAX   ((float32_t)FLT_MAX)
+
+
+
+#define F64_ABSMIN   ((float64_t)0.0)
+#define F32_ABSMIN   ((float32_t)0.0)
+
+
+#define Q31_MAX   ((q31_t)(0x7FFFFFFFL))
+#define Q15_MAX   ((q15_t)(0x7FFF))
+#define Q7_MAX    ((q7_t)(0x7F))
+#define Q31_MIN   ((q31_t)(0x80000000L))
+#define Q15_MIN   ((q15_t)(0x8000))
+#define Q7_MIN    ((q7_t)(0x80))
+
+#define Q31_ABSMAX   ((q31_t)(0x7FFFFFFFL))
+#define Q15_ABSMAX   ((q15_t)(0x7FFF))
+#define Q7_ABSMAX    ((q7_t)(0x7F))
+#define Q31_ABSMIN   ((q31_t)0)
+#define Q15_ABSMIN   ((q15_t)0)
+#define Q7_ABSMIN    ((q7_t)0)
+
+  /* Dimension C vector space */
+  #define CMPLX_DIM 2
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*ifndef _ARM_MATH_TYPES_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_types_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_types_f16.h
old mode 100644
new mode 100755
similarity index 65%
rename from Libraries/CMSIS/5.9.0/DSP/Include/arm_math_types_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_types_f16.h
index 882dd01c8bb..26b8feeec9c
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_types_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_math_types_f16.h
@@ -23,16 +23,17 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_MATH_TYPES_F16_H
-#define _ARM_MATH_TYPES_F16_H
+#ifndef ARM_MATH_TYPES_F16_H
+#define ARM_MATH_TYPES_F16_H
 
 #include "arm_math_types.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
-#if !defined(__CC_ARM)
+#if !defined( __CC_ARM )
 
 /**
  * @brief 16-bit floating-point type definition.
@@ -48,93 +49,97 @@ If it is not available, f16 version of the kernels
 won't be built.
 
 */
-#if !(__ARM_FEATURE_MVE & 2)
-#if !defined(DISABLEFLOAT16)
-#if defined(__ARM_FP16_FORMAT_IEEE) || defined(__ARM_FP16_FORMAT_ALTERNATIVE)
-typedef __fp16 float16_t;
-#define ARM_FLOAT16_SUPPORTED
-#endif
-#endif
-#else
+#if defined(__ARM_FEATURE_MVE) &&  (__ARM_FEATURE_MVE & 2)
 /* When Vector float16, this flag is always defined and can't be disabled */
-#define ARM_FLOAT16_SUPPORTED
+  #define ARM_FLOAT16_SUPPORTED
+#else
+  #if !defined(DISABLEFLOAT16)
+    #if defined(__ARM_FP16_FORMAT_IEEE) || defined(__ARM_FP16_FORMAT_ALTERNATIVE)
+      typedef __fp16 float16_t;
+      #define ARM_FLOAT16_SUPPORTED
+    #endif
+  #endif
 #endif
 
-#if defined(ARM_MATH_NEON) || \
-    (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
+#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF)  && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
 
 #if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16)
 
-/**
+  /**
    * @brief 16-bit floating-point 128-bit vector data type
    */
-typedef __ALIGNED(2) float16x8_t f16x8_t;
+  typedef __ALIGNED(2) float16x8_t f16x8_t;
 
-/**
+  /**
    * @brief 16-bit floating-point 128-bit vector pair data type
    */
-typedef float16x8x2_t f16x8x2_t;
+  typedef float16x8x2_t f16x8x2_t;
 
-/**
+  /**
    * @brief 16-bit floating-point 128-bit vector quadruplet data type
    */
-typedef float16x8x4_t f16x8x4_t;
+  typedef float16x8x4_t f16x8x4_t;
 
-/**
+  /**
    * @brief 16-bit ubiquitous 128-bit vector data type
    */
-typedef union _any16x8_t {
-    float16x8_t f;
-    int16x8_t i;
-} any16x8_t;
+  typedef union _any16x8_t
+  {
+      float16x8_t     f;
+      int16x8_t       i;
+  } any16x8_t;
 #endif
 
 #endif
 
 #if defined(ARM_MATH_NEON)
 
+
 #if defined(ARM_MATH_NEON_FLOAT16)
-/**
+  /**
    * @brief 16-bit float 64-bit vector data type.
    */
-typedef __ALIGNED(2) float16x4_t f16x4_t;
+  typedef  __ALIGNED(2) float16x4_t f16x4_t;
 
-/**
+  /**
    * @brief 16-bit floating-point 128-bit vector triplet data type
    */
-typedef float16x8x3_t f16x8x3_t;
+  typedef float16x8x3_t f16x8x3_t;
 
-/**
+  /**
    * @brief 16-bit floating-point 64-bit vector pair data type
    */
-typedef float16x4x2_t f16x4x2_t;
+  typedef float16x4x2_t f16x4x2_t;
 
-/**
+  /**
    * @brief 16-bit floating-point 64-bit vector triplet data type
    */
-typedef float16x4x3_t f16x4x3_t;
+  typedef float16x4x3_t f16x4x3_t;
 
-/**
+  /**
    * @brief 16-bit floating-point 64-bit vector quadruplet data type
    */
-typedef float16x4x4_t f16x4x4_t;
+  typedef float16x4x4_t f16x4x4_t;
 
-/**
+  /**
    * @brief 16-bit ubiquitous 64-bit vector data type
    */
-typedef union _any16x4_t {
-    float16x4_t f;
-    int16x4_t i;
-} any16x4_t;
+  typedef union _any16x4_t
+  {
+      float16x4_t     f;
+      int16x4_t       i;
+  } any16x4_t;
 #endif
 
 #endif
 
+
+
 #if defined(ARM_FLOAT16_SUPPORTED)
 
 #if defined(__ICCARM__)
 
-#define F16INFINITY ((float16_t)INFINITY)
+#define F16INFINITY   ((float16_t) INFINITY)
 
 #else
 
@@ -142,16 +147,16 @@ typedef union _any16x4_t {
 
 #endif
 
-#define F16_MAX ((float16_t)__FLT16_MAX__)
-#define F16_MIN (-(_Float16)__FLT16_MAX__)
+#define F16_MAX   ((float16_t)__FLT16_MAX__)
+#define F16_MIN   (-(_Float16)__FLT16_MAX__)
 
-#define F16_ABSMAX ((float16_t)__FLT16_MAX__)
-#define F16_ABSMIN ((float16_t)0.0f16)
+#define F16_ABSMAX   ((float16_t)__FLT16_MAX__)
+#define F16_ABSMIN   ((float16_t)0.0f16)
 
 #endif /* ARM_FLOAT16_SUPPORTED*/
 #endif /* !defined( __CC_ARM ) */
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_mve_tables.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_mve_tables.h
new file mode 100755
index 00000000000..aa58d7a92c6
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_mve_tables.h
@@ -0,0 +1,193 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mve_tables.h
+ * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
+ *               used for MVE implementation only
+ *
+ * @version  V1.10.0
+ * @date     04 October 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ #ifndef ARM_MVE_TABLES_H
+ #define ARM_MVE_TABLES_H
+
+#include "arm_math_types.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+ 
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_16_f32[2];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_16_f32[2];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_16_f32[2];
+extern const float32_t rearranged_twiddle_stride1_16_f32[8];
+extern const float32_t rearranged_twiddle_stride2_16_f32[8];
+extern const float32_t rearranged_twiddle_stride3_16_f32[8];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_64_f32[3];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_64_f32[3];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_64_f32[3];
+extern const float32_t rearranged_twiddle_stride1_64_f32[40];
+extern const float32_t rearranged_twiddle_stride2_64_f32[40];
+extern const float32_t rearranged_twiddle_stride3_64_f32[40];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_256_f32[4];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_256_f32[4];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_256_f32[4];
+extern const float32_t rearranged_twiddle_stride1_256_f32[168];
+extern const float32_t rearranged_twiddle_stride2_256_f32[168];
+extern const float32_t rearranged_twiddle_stride3_256_f32[168];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_1024_f32[5];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_1024_f32[5];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_1024_f32[5];
+extern const float32_t rearranged_twiddle_stride1_1024_f32[680];
+extern const float32_t rearranged_twiddle_stride2_1024_f32[680];
+extern const float32_t rearranged_twiddle_stride3_1024_f32[680];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_4096_f32[6];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_4096_f32[6];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_4096_f32[6];
+extern const float32_t rearranged_twiddle_stride1_4096_f32[2728];
+extern const float32_t rearranged_twiddle_stride2_4096_f32[2728];
+extern const float32_t rearranged_twiddle_stride3_4096_f32[2728];
+
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+
+#if defined(ARM_MATH_MVEI)  && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_16_q31[2];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_16_q31[2];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_16_q31[2];
+extern const q31_t rearranged_twiddle_stride1_16_q31[8];
+extern const q31_t rearranged_twiddle_stride2_16_q31[8];
+extern const q31_t rearranged_twiddle_stride3_16_q31[8];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_64_q31[3];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_64_q31[3];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_64_q31[3];
+extern const q31_t rearranged_twiddle_stride1_64_q31[40];
+extern const q31_t rearranged_twiddle_stride2_64_q31[40];
+extern const q31_t rearranged_twiddle_stride3_64_q31[40];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_256_q31[4];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_256_q31[4];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_256_q31[4];
+extern const q31_t rearranged_twiddle_stride1_256_q31[168];
+extern const q31_t rearranged_twiddle_stride2_256_q31[168];
+extern const q31_t rearranged_twiddle_stride3_256_q31[168];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_1024_q31[5];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_1024_q31[5];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_1024_q31[5];
+extern const q31_t rearranged_twiddle_stride1_1024_q31[680];
+extern const q31_t rearranged_twiddle_stride2_1024_q31[680];
+extern const q31_t rearranged_twiddle_stride3_1024_q31[680];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_4096_q31[6];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_4096_q31[6];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_4096_q31[6];
+extern const q31_t rearranged_twiddle_stride1_4096_q31[2728];
+extern const q31_t rearranged_twiddle_stride2_4096_q31[2728];
+extern const q31_t rearranged_twiddle_stride3_4096_q31[2728];
+
+
+
+#endif /* defined(ARM_MATH_MVEI) */
+
+
+
+#if defined(ARM_MATH_MVEI)  && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_16_q15[2];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_16_q15[2];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_16_q15[2];
+extern const q15_t rearranged_twiddle_stride1_16_q15[8];
+extern const q15_t rearranged_twiddle_stride2_16_q15[8];
+extern const q15_t rearranged_twiddle_stride3_16_q15[8];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_64_q15[3];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_64_q15[3];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_64_q15[3];
+extern const q15_t rearranged_twiddle_stride1_64_q15[40];
+extern const q15_t rearranged_twiddle_stride2_64_q15[40];
+extern const q15_t rearranged_twiddle_stride3_64_q15[40];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_256_q15[4];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_256_q15[4];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_256_q15[4];
+extern const q15_t rearranged_twiddle_stride1_256_q15[168];
+extern const q15_t rearranged_twiddle_stride2_256_q15[168];
+extern const q15_t rearranged_twiddle_stride3_256_q15[168];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_1024_q15[5];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_1024_q15[5];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_1024_q15[5];
+extern const q15_t rearranged_twiddle_stride1_1024_q15[680];
+extern const q15_t rearranged_twiddle_stride2_1024_q15[680];
+extern const q15_t rearranged_twiddle_stride3_1024_q15[680];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_4096_q15[6];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_4096_q15[6];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_4096_q15[6];
+extern const q15_t rearranged_twiddle_stride1_4096_q15[2728];
+extern const q15_t rearranged_twiddle_stride2_4096_q15[2728];
+extern const q15_t rearranged_twiddle_stride3_4096_q15[2728];
+
+
+#endif /* defined(ARM_MATH_MVEI) */
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*_ARM_MVE_TABLES_H*/
+
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_mve_tables_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_mve_tables_f16.h
new file mode 100755
index 00000000000..ae2824529a3
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_mve_tables_f16.h
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mve_tables_f16.h
+ * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
+ *               used for MVE implementation only
+ *
+ * @version  V1.10.0
+ * @date     04 October 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ #ifndef ARM_MVE_TABLES_F16_H
+ #define ARM_MVE_TABLES_F16_H
+
+#include "arm_math_types_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+ 
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_16_f16[2];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_16_f16[2];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_16_f16[2];
+extern const float16_t rearranged_twiddle_stride1_16_f16[8];
+extern const float16_t rearranged_twiddle_stride2_16_f16[8];
+extern const float16_t rearranged_twiddle_stride3_16_f16[8];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_64_f16[3];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_64_f16[3];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_64_f16[3];
+extern const float16_t rearranged_twiddle_stride1_64_f16[40];
+extern const float16_t rearranged_twiddle_stride2_64_f16[40];
+extern const float16_t rearranged_twiddle_stride3_64_f16[40];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_256_f16[4];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_256_f16[4];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_256_f16[4];
+extern const float16_t rearranged_twiddle_stride1_256_f16[168];
+extern const float16_t rearranged_twiddle_stride2_256_f16[168];
+extern const float16_t rearranged_twiddle_stride3_256_f16[168];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_1024_f16[5];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_1024_f16[5];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_1024_f16[5];
+extern const float16_t rearranged_twiddle_stride1_1024_f16[680];
+extern const float16_t rearranged_twiddle_stride2_1024_f16[680];
+extern const float16_t rearranged_twiddle_stride3_1024_f16[680];
+
+
+extern const uint32_t rearranged_twiddle_tab_stride1_arr_4096_f16[6];
+extern const uint32_t rearranged_twiddle_tab_stride2_arr_4096_f16[6];
+extern const uint32_t rearranged_twiddle_tab_stride3_arr_4096_f16[6];
+extern const float16_t rearranged_twiddle_stride1_4096_f16[2728];
+extern const float16_t rearranged_twiddle_stride2_4096_f16[2728];
+extern const float16_t rearranged_twiddle_stride3_4096_f16[2728];
+
+
+
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*_ARM_MVE_TABLES_F16_H*/
+
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_vec_math.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_vec_math.h
old mode 100644
new mode 100755
similarity index 67%
rename from Libraries/CMSIS/5.9.0/DSP/Include/arm_vec_math.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_vec_math.h
index 0616db74dbc..ec90802e09b
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_vec_math.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_vec_math.h
@@ -23,30 +23,33 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_VEC_MATH_H
-#define _ARM_VEC_MATH_H
+#ifndef ARM_VEC_MATH_H
+#define ARM_VEC_MATH_H
 
 #include "arm_math_types.h"
 #include "arm_common_tables.h"
 #include "arm_helium_utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
 
-#define INV_NEWTON_INIT_F32 0x7EF127EA
+#define INV_NEWTON_INIT_F32         0x7EF127EA
+
+static const float32_t __logf_rng_f32=0.693147180f;
 
-static const float32_t __logf_rng_f32 = 0.693147180f;
 
 /* fast inverse approximation (3x newton) */
-__STATIC_INLINE f32x4_t vrecip_medprec_f32(f32x4_t x)
+__STATIC_INLINE f32x4_t vrecip_medprec_f32(
+    f32x4_t x)
 {
-    q31x4_t m;
-    f32x4_t b;
-    any32x4_t xinv;
-    f32x4_t ax = vabsq(x);
+    q31x4_t         m;
+    f32x4_t         b;
+    any32x4_t       xinv;
+    f32x4_t         ax = vabsq(x);
 
     xinv.f = ax;
     m = 0x3F800000 - (xinv.i & 0x7F800000);
@@ -63,7 +66,7 @@ __STATIC_INLINE f32x4_t vrecip_medprec_f32(f32x4_t x)
     b = 2.0f - xinv.f * ax;
     xinv.f = xinv.f * b;
 
-    xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
+    xinv.f = vdupq_m(xinv.f, F32_MAX, vcmpeqq(x, 0.0f));
     /*
      * restore sign
      */
@@ -73,12 +76,13 @@ __STATIC_INLINE f32x4_t vrecip_medprec_f32(f32x4_t x)
 }
 
 /* fast inverse approximation (4x newton) */
-__STATIC_INLINE f32x4_t vrecip_hiprec_f32(f32x4_t x)
+__STATIC_INLINE f32x4_t vrecip_hiprec_f32(
+    f32x4_t x)
 {
-    q31x4_t m;
-    f32x4_t b;
-    any32x4_t xinv;
-    f32x4_t ax = vabsq(x);
+    q31x4_t         m;
+    f32x4_t         b;
+    any32x4_t       xinv;
+    f32x4_t         ax = vabsq(x);
 
     xinv.f = ax;
 
@@ -99,7 +103,7 @@ __STATIC_INLINE f32x4_t vrecip_hiprec_f32(f32x4_t x)
     b = 2.0f - xinv.f * ax;
     xinv.f = xinv.f * b;
 
-    xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
+    xinv.f = vdupq_m(xinv.f, F32_MAX, vcmpeqq(x, 0.0f));
     /*
      * restore sign
      */
@@ -108,7 +112,8 @@ __STATIC_INLINE f32x4_t vrecip_hiprec_f32(f32x4_t x)
     return xinv.f;
 }
 
-__STATIC_INLINE f32x4_t vdiv_f32(f32x4_t num, f32x4_t den)
+__STATIC_INLINE f32x4_t vdiv_f32(
+    f32x4_t num, f32x4_t den)
 {
     return vmulq(num, vrecip_hiprec_f32(den));
 }
@@ -120,23 +125,27 @@ __STATIC_INLINE f32x4_t vdiv_f32(f32x4_t num, f32x4_t den)
   @return        destination    f32 quad vector
  */
 
-__STATIC_INLINE f32x4_t vtaylor_polyq_f32(f32x4_t x, const float32_t *coeffs)
+__STATIC_INLINE f32x4_t vtaylor_polyq_f32(
+        f32x4_t           x,
+        const float32_t * coeffs)
 {
-    f32x4_t A = vfmasq(vdupq_n_f32(coeffs[4]), x, coeffs[0]);
-    f32x4_t B = vfmasq(vdupq_n_f32(coeffs[6]), x, coeffs[2]);
-    f32x4_t C = vfmasq(vdupq_n_f32(coeffs[5]), x, coeffs[1]);
-    f32x4_t D = vfmasq(vdupq_n_f32(coeffs[7]), x, coeffs[3]);
-    f32x4_t x2 = vmulq(x, x);
-    f32x4_t x4 = vmulq(x2, x2);
-    f32x4_t res = vfmaq(vfmaq_f32(A, B, x2), vfmaq_f32(C, D, x2), x4);
+    f32x4_t         A = vfmasq(vdupq_n_f32(coeffs[4]), x, coeffs[0]);
+    f32x4_t         B = vfmasq(vdupq_n_f32(coeffs[6]), x, coeffs[2]);
+    f32x4_t         C = vfmasq(vdupq_n_f32(coeffs[5]), x, coeffs[1]);
+    f32x4_t         D = vfmasq(vdupq_n_f32(coeffs[7]), x, coeffs[3]);
+    f32x4_t         x2 = vmulq(x, x);
+    f32x4_t         x4 = vmulq(x2, x2);
+    f32x4_t         res = vfmaq(vfmaq_f32(A, B, x2), vfmaq_f32(C, D, x2), x4);
 
     return res;
 }
 
-__STATIC_INLINE f32x4_t vmant_exp_f32(f32x4_t x, int32x4_t *e)
+__STATIC_INLINE f32x4_t vmant_exp_f32(
+    f32x4_t     x,
+    int32x4_t * e)
 {
-    any32x4_t r;
-    int32x4_t n;
+    any32x4_t       r;
+    int32x4_t       n;
 
     r.f = x;
     n = r.i >> 23;
@@ -147,12 +156,13 @@ __STATIC_INLINE f32x4_t vmant_exp_f32(f32x4_t x, int32x4_t *e)
     return r.f;
 }
 
+
 __STATIC_INLINE f32x4_t vlogq_f32(f32x4_t vecIn)
 {
-    q31x4_t vecExpUnBiased;
-    f32x4_t vecTmpFlt0, vecTmpFlt1;
-    f32x4_t vecAcc0, vecAcc1, vecAcc2, vecAcc3;
-    f32x4_t vecExpUnBiasedFlt;
+    q31x4_t         vecExpUnBiased;
+    f32x4_t         vecTmpFlt0, vecTmpFlt1;
+    f32x4_t         vecAcc0, vecAcc1, vecAcc2, vecAcc3;
+    f32x4_t         vecExpUnBiasedFlt;
 
     /*
      * extract exponent
@@ -203,21 +213,22 @@ __STATIC_INLINE f32x4_t vlogq_f32(f32x4_t vecIn)
      */
     vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f32);
     // set log0 down to -inf
-    vecAcc0 = vdupq_m(vecAcc0, -INFINITY, vcmpeqq(vecIn, 0.0f));
+    vecAcc0 = vdupq_m(vecAcc0, -F32_MAX, vcmpeqq(vecIn, 0.0f));
     return vecAcc0;
 }
 
-__STATIC_INLINE f32x4_t vexpq_f32(f32x4_t x)
+__STATIC_INLINE f32x4_t vexpq_f32(
+    f32x4_t x)
 {
     // Perform range reduction [-log(2),log(2)]
-    int32x4_t m = vcvtq_s32_f32(vmulq_n_f32(x, 1.4426950408f));
-    f32x4_t val = vfmsq_f32(x, vcvtq_f32_s32(m), vdupq_n_f32(0.6931471805f));
+    int32x4_t       m = vcvtq_s32_f32(vmulq_n_f32(x, 1.4426950408f));
+    f32x4_t         val = vfmsq_f32(x, vcvtq_f32_s32(m), vdupq_n_f32(0.6931471805f));
 
     // Polynomial Approximation
-    f32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
+    f32x4_t         poly = vtaylor_polyq_f32(val, exp_tab);
 
     // Reconstruct
-    poly = (f32x4_t)(vqaddq_s32((q31x4_t)(poly), vqshlq_n_s32(m, 23)));
+    poly = (f32x4_t) (vqaddq_s32((q31x4_t) (poly), vqshlq_n_s32(m, 23)));
 
     poly = vdupq_m(poly, 0.0f, vcmpltq_n_s32(m, -126));
     return poly;
@@ -225,7 +236,7 @@ __STATIC_INLINE f32x4_t vexpq_f32(f32x4_t x)
 
 __STATIC_INLINE f32x4_t arm_vec_exponent_f32(f32x4_t x, int32_t nb)
 {
-    f32x4_t r = x;
+    f32x4_t         r = x;
     nb--;
     while (nb > 0) {
         r = vmulq(r, x);
@@ -236,8 +247,8 @@ __STATIC_INLINE f32x4_t arm_vec_exponent_f32(f32x4_t x, int32_t nb)
 
 __STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn)
 {
-    f32x4_t vecSx, vecW, vecTmp;
-    any32x4_t v;
+    f32x4_t     vecSx, vecW, vecTmp;
+    any32x4_t   v;
 
     vecSx = vabsq(vecIn);
 
@@ -254,9 +265,9 @@ __STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn)
     vecTmp = vfmasq(vecW, vecTmp, 56.0f);
     vecTmp = vfmasq(vecW, vecTmp, -28.0f);
     vecTmp = vfmasq(vecW, vecTmp, 8.0f);
-    v.f = vmulq(v.f, vecTmp);
+    v.f = vmulq(v.f,  vecTmp);
 
-    v.f = vdupq_m(v.f, INFINITY, vcmpeqq(vecIn, 0.0f));
+    v.f = vdupq_m(v.f, F32_MAX, vcmpeqq(vecIn, 0.0f));
     /*
      * restore sign
      */
@@ -264,17 +275,21 @@ __STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn)
     return v.f;
 }
 
-__STATIC_INLINE f32x4_t vtanhq_f32(f32x4_t val)
+__STATIC_INLINE f32x4_t vtanhq_f32(
+    f32x4_t val)
 {
-    f32x4_t x = vminnmq_f32(vmaxnmq_f32(val, vdupq_n_f32(-10.f)), vdupq_n_f32(10.0f));
-    f32x4_t exp2x = vexpq_f32(vmulq_n_f32(x, 2.f));
-    f32x4_t num = vsubq_n_f32(exp2x, 1.f);
-    f32x4_t den = vaddq_n_f32(exp2x, 1.f);
-    f32x4_t tanh = vmulq_f32(num, vrecip_f32(den));
+    f32x4_t         x =
+        vminnmq_f32(vmaxnmq_f32(val, vdupq_n_f32(-10.f)), vdupq_n_f32(10.0f));
+    f32x4_t         exp2x = vexpq_f32(vmulq_n_f32(x, 2.f));
+    f32x4_t         num = vsubq_n_f32(exp2x, 1.f);
+    f32x4_t         den = vaddq_n_f32(exp2x, 1.f);
+    f32x4_t         tanh = vmulq_f32(num, vrecip_f32(den));
     return tanh;
 }
 
-__STATIC_INLINE f32x4_t vpowq_f32(f32x4_t val, f32x4_t n)
+__STATIC_INLINE f32x4_t vpowq_f32(
+    f32x4_t val,
+    f32x4_t n)
 {
     return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
 }
@@ -284,8 +299,7 @@ __STATIC_INLINE f32x4_t vpowq_f32(f32x4_t val, f32x4_t n)
 #if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
 #endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */
 
-#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && \
-    !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #include "NEMath.h"
 /**
@@ -295,18 +309,20 @@ __STATIC_INLINE f32x4_t vpowq_f32(f32x4_t val, f32x4_t n)
  * @return x^nb
  *
  */
-__STATIC_INLINE float32x4_t arm_vec_exponent_f32(float32x4_t x, int32_t nb)
+__STATIC_INLINE  float32x4_t arm_vec_exponent_f32(float32x4_t x, int32_t nb)
 {
     float32x4_t r = x;
-    nb--;
-    while (nb > 0) {
-        r = vmulq_f32(r, x);
+    nb --;
+    while(nb > 0)
+    {
+        r = vmulq_f32(r , x);
         nb--;
     }
-    return (r);
+    return(r);
 }
 
-__STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x)
+
+__STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t  x)
 {
     float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN));
     float32x4_t e = vrsqrteq_f32(x1);
@@ -318,36 +334,37 @@ __STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x)
 __STATIC_INLINE int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec)
 {
     float32x4_t tempF;
-    int32x4_t tempHI, tempLO;
+    int32x4_t tempHI,tempLO;
 
     tempLO = vmovl_s16(vget_low_s16(vec));
-    tempF = vcvtq_n_f32_s32(tempLO, 15);
+    tempF = vcvtq_n_f32_s32(tempLO,15);
     tempF = __arm_vec_sqrt_f32_neon(tempF);
-    tempLO = vcvtq_n_s32_f32(tempF, 15);
+    tempLO = vcvtq_n_s32_f32(tempF,15);
 
     tempHI = vmovl_s16(vget_high_s16(vec));
-    tempF = vcvtq_n_f32_s32(tempHI, 15);
+    tempF = vcvtq_n_f32_s32(tempHI,15);
     tempF = __arm_vec_sqrt_f32_neon(tempF);
-    tempHI = vcvtq_n_s32_f32(tempF, 15);
+    tempHI = vcvtq_n_s32_f32(tempF,15);
 
-    return (vcombine_s16(vqmovn_s32(tempLO), vqmovn_s32(tempHI)));
+    return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI)));
 }
 
 __STATIC_INLINE int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec)
 {
-    float32x4_t temp;
+  float32x4_t temp;
 
-    temp = vcvtq_n_f32_s32(vec, 31);
-    temp = __arm_vec_sqrt_f32_neon(temp);
-    return (vcvtq_n_s32_f32(temp, 31));
+  temp = vcvtq_n_f32_s32(vec,31);
+  temp = __arm_vec_sqrt_f32_neon(temp);
+  return(vcvtq_n_s32_f32(temp,31));
 }
 
 #endif /*  (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
+
 #endif /* _ARM_VEC_MATH_H */
 
 /**
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_vec_math_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_vec_math_f16.h
old mode 100644
new mode 100755
similarity index 68%
rename from Libraries/CMSIS/5.9.0/DSP/Include/arm_vec_math_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_vec_math_f16.h
index 7fe91b07d70..70e503d66e2
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_vec_math_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/arm_vec_math_f16.h
@@ -23,30 +23,34 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_VEC_MATH_F16_H
-#define _ARM_VEC_MATH_F16_H
+#ifndef ARM_VEC_MATH_F16_H
+#define ARM_VEC_MATH_F16_H
 
 #include "arm_math_types_f16.h"
 #include "arm_common_tables_f16.h"
 #include "arm_helium_utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
+
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 
-static const float16_t __logf_rng_f16 = 0.693147180f16;
+
+static const float16_t __logf_rng_f16=0.693147180f16;
 
 /* fast inverse approximation (3x newton) */
-__STATIC_INLINE f16x8_t vrecip_medprec_f16(f16x8_t x)
+__STATIC_INLINE f16x8_t vrecip_medprec_f16(
+    f16x8_t x)
 {
-    q15x8_t m;
-    f16x8_t b;
-    any16x8_t xinv;
-    f16x8_t ax = vabsq(x);
+    q15x8_t         m;
+    f16x8_t         b;
+    any16x8_t       xinv;
+    f16x8_t         ax = vabsq(x);
 
     xinv.f = ax;
 
@@ -74,12 +78,13 @@ __STATIC_INLINE f16x8_t vrecip_medprec_f16(f16x8_t x)
 }
 
 /* fast inverse approximation (4x newton) */
-__STATIC_INLINE f16x8_t vrecip_hiprec_f16(f16x8_t x)
+__STATIC_INLINE f16x8_t vrecip_hiprec_f16(
+    f16x8_t x)
 {
-    q15x8_t m;
-    f16x8_t b;
-    any16x8_t xinv;
-    f16x8_t ax = vabsq(x);
+    q15x8_t         m;
+    f16x8_t         b;
+    any16x8_t       xinv;
+    f16x8_t         ax = vabsq(x);
 
     xinv.f = ax;
 
@@ -109,11 +114,13 @@ __STATIC_INLINE f16x8_t vrecip_hiprec_f16(f16x8_t x)
     return xinv.f;
 }
 
-__STATIC_INLINE f16x8_t vdiv_f16(f16x8_t num, f16x8_t den)
+__STATIC_INLINE f16x8_t vdiv_f16(
+    f16x8_t num, f16x8_t den)
 {
     return vmulq(num, vrecip_hiprec_f16(den));
 }
 
+
 /**
   @brief         Single-precision taylor dev.
   @param[in]     x              f16  vector input
@@ -121,37 +128,39 @@ __STATIC_INLINE f16x8_t vdiv_f16(f16x8_t num, f16x8_t den)
   @return        destination    f16  vector
  */
 
-__STATIC_INLINE float16x8_t vtaylor_polyq_f16(float16x8_t x, const float16_t *coeffs)
+__STATIC_INLINE float16x8_t vtaylor_polyq_f16(
+        float16x8_t           x,
+        const float16_t * coeffs)
 {
-    float16x8_t A = vfmasq(vdupq_n_f16(coeffs[4]), x, coeffs[0]);
-    float16x8_t B = vfmasq(vdupq_n_f16(coeffs[6]), x, coeffs[2]);
-    float16x8_t C = vfmasq(vdupq_n_f16(coeffs[5]), x, coeffs[1]);
-    float16x8_t D = vfmasq(vdupq_n_f16(coeffs[7]), x, coeffs[3]);
-    float16x8_t x2 = vmulq(x, x);
-    float16x8_t x4 = vmulq(x2, x2);
-    float16x8_t res = vfmaq(vfmaq_f16(A, B, x2), vfmaq_f16(C, D, x2), x4);
+    float16x8_t         A = vfmasq(vdupq_n_f16(coeffs[4]), x, coeffs[0]);
+    float16x8_t         B = vfmasq(vdupq_n_f16(coeffs[6]), x, coeffs[2]);
+    float16x8_t         C = vfmasq(vdupq_n_f16(coeffs[5]), x, coeffs[1]);
+    float16x8_t         D = vfmasq(vdupq_n_f16(coeffs[7]), x, coeffs[3]);
+    float16x8_t         x2 = vmulq(x, x);
+    float16x8_t         x4 = vmulq(x2, x2);
+    float16x8_t         res = vfmaq(vfmaq_f16(A, B, x2), vfmaq_f16(C, D, x2), x4);
 
     return res;
 }
 
-#define VMANT_EXP_F16(x)   \
-    any16x8_t r;           \
-    int16x8_t n;           \
-                           \
-    r.f = x;               \
-    n = r.i >> 10;         \
-    n = n - 15;            \
-    r.i = r.i - (n << 10); \
-                           \
-    vecExpUnBiased = n;    \
+#define VMANT_EXP_F16(x)  \
+    any16x8_t       r;    \
+    int16x8_t       n;    \
+                          \
+    r.f = x;              \
+    n = r.i >> 10;        \
+    n = n - 15;           \
+    r.i = r.i - (n << 10);\
+                          \
+    vecExpUnBiased = n;   \
     vecTmpFlt1 = r.f;
 
 __STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
 {
-    q15x8_t vecExpUnBiased;
-    float16x8_t vecTmpFlt0, vecTmpFlt1;
-    float16x8_t vecAcc0, vecAcc1, vecAcc2, vecAcc3;
-    float16x8_t vecExpUnBiasedFlt;
+    q15x8_t             vecExpUnBiased;
+    float16x8_t         vecTmpFlt0, vecTmpFlt1;
+    float16x8_t         vecAcc0, vecAcc1, vecAcc2, vecAcc3;
+    float16x8_t         vecExpUnBiasedFlt;
 
     /*
      * extract exponent
@@ -206,17 +215,18 @@ __STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
     return vecAcc0;
 }
 
-__STATIC_INLINE float16x8_t vexpq_f16(float16x8_t x)
+__STATIC_INLINE float16x8_t vexpq_f16(
+    float16x8_t x)
 {
     // Perform range reduction [-log(2),log(2)]
-    int16x8_t m = vcvtq_s16_f16(vmulq_n_f16(x, 1.4426950408f16));
-    float16x8_t val = vfmsq_f16(x, vcvtq_f16_s16(m), vdupq_n_f16(0.6931471805f16));
+    int16x8_t       m = vcvtq_s16_f16(vmulq_n_f16(x, 1.4426950408f16));
+    float16x8_t     val = vfmsq_f16(x, vcvtq_f16_s16(m), vdupq_n_f16(0.6931471805f16));
 
     // Polynomial Approximation
-    float16x8_t poly = vtaylor_polyq_f16(val, exp_tab_f16);
+    float16x8_t         poly = vtaylor_polyq_f16(val, exp_tab_f16);
 
     // Reconstruct
-    poly = (float16x8_t)(vqaddq_s16((int16x8_t)(poly), vqshlq_n_s16(m, 10)));
+    poly = (float16x8_t) (vqaddq_s16((int16x8_t) (poly), vqshlq_n_s16(m, 10)));
 
     poly = vdupq_m_n_f16(poly, 0.0f16, vcmpltq_n_s16(m, -14));
     return poly;
@@ -224,7 +234,7 @@ __STATIC_INLINE float16x8_t vexpq_f16(float16x8_t x)
 
 __STATIC_INLINE float16x8_t arm_vec_exponent_f16(float16x8_t x, int16_t nb)
 {
-    float16x8_t r = x;
+    float16x8_t         r = x;
     nb--;
     while (nb > 0) {
         r = vmulq(r, x);
@@ -233,17 +243,19 @@ __STATIC_INLINE float16x8_t arm_vec_exponent_f16(float16x8_t x, int16_t nb)
     return (r);
 }
 
-__STATIC_INLINE f16x8_t vpowq_f16(f16x8_t val, f16x8_t n)
+__STATIC_INLINE f16x8_t vpowq_f16(
+    f16x8_t val,
+    f16x8_t n)
 {
     return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
 }
 
-#define INV_NEWTON_INIT_F16 0x7773
+#define INV_NEWTON_INIT_F16  0x7773
 
 __STATIC_INLINE f16x8_t vrecip_f16(f16x8_t vecIn)
 {
-    f16x8_t vecSx, vecW, vecTmp;
-    any16x8_t v;
+    f16x8_t     vecSx, vecW, vecTmp;
+    any16x8_t   v;
 
     vecSx = vabsq(vecIn);
 
@@ -260,7 +272,7 @@ __STATIC_INLINE f16x8_t vrecip_f16(f16x8_t vecIn)
     vecTmp = vfmasq_n_f16(vecW, vecTmp, 56.0f16);
     vecTmp = vfmasq_n_f16(vecW, vecTmp, -28.0f16);
     vecTmp = vfmasq_n_f16(vecW, vecTmp, 8.0f16);
-    v.f = vmulq(v.f, vecTmp);
+    v.f = vmulq(v.f,  vecTmp);
 
     v.f = vdupq_m_n_f16(v.f, F16INFINITY, vcmpeqq_n_f16(vecIn, 0.0f));
     /*
@@ -270,19 +282,23 @@ __STATIC_INLINE f16x8_t vrecip_f16(f16x8_t vecIn)
     return v.f;
 }
 
-__STATIC_INLINE f16x8_t vtanhq_f16(f16x8_t val)
+__STATIC_INLINE f16x8_t vtanhq_f16(
+    f16x8_t val)
 {
-    f16x8_t x = vminnmq_f16(vmaxnmq_f16(val, vdupq_n_f16(-10.f16)), vdupq_n_f16(10.0f16));
-    f16x8_t exp2x = vexpq_f16(vmulq_n_f16(x, 2.f16));
-    f16x8_t num = vsubq_n_f16(exp2x, 1.f16);
-    f16x8_t den = vaddq_n_f16(exp2x, 1.f16);
-    f16x8_t tanh = vmulq_f16(num, vrecip_f16(den));
+    f16x8_t         x =
+        vminnmq_f16(vmaxnmq_f16(val, vdupq_n_f16(-10.f16)), vdupq_n_f16(10.0f16));
+    f16x8_t         exp2x = vexpq_f16(vmulq_n_f16(x, 2.f16));
+    f16x8_t         num = vsubq_n_f16(exp2x, 1.f16);
+    f16x8_t         den = vaddq_n_f16(exp2x, 1.f16);
+    f16x8_t         tanh = vmulq_f16(num, vrecip_f16(den));
     return tanh;
 }
 
 #endif /* defined(ARM_MATH_MVE_FLOAT16)  && !defined(ARM_MATH_AUTOVECTORIZE)*/
 
-#ifdef __cplusplus
+
+
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/basic_math_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/basic_math_functions.h
old mode 100644
new mode 100755
similarity index 71%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/basic_math_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/basic_math_functions.h
index d6e50493c93..645afdc3b94
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/basic_math_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/basic_math_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _BASIC_MATH_FUNCTIONS_H_
-#define _BASIC_MATH_FUNCTIONS_H_
+ 
+#ifndef BASIC_MATH_FUNCTIONS_H_
+#define BASIC_MATH_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -32,50 +33,72 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
  * @defgroup groupMath Basic Math Functions
  */
 
-/**
+ /**
    * @brief Q7 vector multiplication.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_mult_q7(const q7_t *pSrcA, const q7_t *pSrcB, q7_t *pDst, uint32_t blockSize);
+  void arm_mult_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q15 vector multiplication.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_mult_q15(const q15_t *pSrcA, const q15_t *pSrcB, q15_t *pDst, uint32_t blockSize);
+  void arm_mult_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q31 vector multiplication.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_mult_q31(const q31_t *pSrcA, const q31_t *pSrcB, q31_t *pDst, uint32_t blockSize);
+  void arm_mult_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Floating-point vector multiplication.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_mult_f32(const float32_t *pSrcA, const float32_t *pSrcB, float32_t *pDst,
-                  uint32_t blockSize);
+  void arm_mult_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
  * @brief Floating-point vector multiplication.
@@ -84,18 +107,28 @@ void arm_mult_f32(const float32_t *pSrcA, const float32_t *pSrcB, float32_t *pDs
  * @param[out] pDst       points to the output vector
  * @param[in]  blockSize  number of samples in each vector
  */
-void arm_mult_f64(const float64_t *pSrcA, const float64_t *pSrcB, float64_t *pDst,
-                  uint32_t blockSize);
+void arm_mult_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  float64_t * pDst,
+	  uint32_t blockSize);
 
-/**
+
+
+ /**
    * @brief Floating-point vector addition.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_add_f32(const float32_t *pSrcA, const float32_t *pSrcB, float32_t *pDst,
-                 uint32_t blockSize);
+  void arm_add_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
   * @brief Floating-point vector addition.
@@ -104,102 +137,159 @@ void arm_add_f32(const float32_t *pSrcA, const float32_t *pSrcB, float32_t *pDst
   * @param[out] pDst       points to the output vector
   * @param[in]  blockSize  number of samples in each vector
   */
-void arm_add_f64(const float64_t *pSrcA, const float64_t *pSrcB, float64_t *pDst,
-                 uint32_t blockSize);
+ void arm_add_f64(
+ const float64_t * pSrcA,
+ const float64_t * pSrcB,
+	   float64_t * pDst,
+	   uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief Q7 vector addition.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_add_q7(const q7_t *pSrcA, const q7_t *pSrcB, q7_t *pDst, uint32_t blockSize);
+  void arm_add_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q15 vector addition.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_add_q15(const q15_t *pSrcA, const q15_t *pSrcB, q15_t *pDst, uint32_t blockSize);
+  void arm_add_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q31 vector addition.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_add_q31(const q31_t *pSrcA, const q31_t *pSrcB, q31_t *pDst, uint32_t blockSize);
+  void arm_add_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Floating-point vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_sub_f32(const float32_t *pSrcA, const float32_t *pSrcB, float32_t *pDst,
-                 uint32_t blockSize);
+  void arm_sub_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief Floating-point vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_sub_f64(const float64_t *pSrcA, const float64_t *pSrcB, float64_t *pDst,
-                 uint32_t blockSize);
+  void arm_sub_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief Q7 vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_sub_q7(const q7_t *pSrcA, const q7_t *pSrcB, q7_t *pDst, uint32_t blockSize);
+  void arm_sub_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q15 vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_sub_q15(const q15_t *pSrcA, const q15_t *pSrcB, q15_t *pDst, uint32_t blockSize);
+  void arm_sub_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q31 vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_sub_q31(const q31_t *pSrcA, const q31_t *pSrcB, q31_t *pDst, uint32_t blockSize);
+  void arm_sub_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Multiplies a floating-point vector by a scalar.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  scale      scale factor to be applied
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_scale_f32(const float32_t *pSrc, float32_t scale, float32_t *pDst, uint32_t blockSize);
+  void arm_scale_f32(
+  const float32_t * pSrc,
+        float32_t scale,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief Multiplies a floating-point vector by a scalar.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  scale      scale factor to be applied
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_scale_f64(const float64_t *pSrc, float64_t scale, float64_t *pDst, uint32_t blockSize);
+  void arm_scale_f64(
+  const float64_t * pSrc,
+        float64_t scale,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief Multiplies a Q7 vector by a scalar.
    * @param[in]  pSrc        points to the input vector
    * @param[in]  scaleFract  fractional portion of the scale value
@@ -207,9 +297,15 @@ void arm_scale_f64(const float64_t *pSrc, float64_t scale, float64_t *pDst, uint
    * @param[out] pDst        points to the output vector
    * @param[in]  blockSize   number of samples in the vector
    */
-void arm_scale_q7(const q7_t *pSrc, q7_t scaleFract, int8_t shift, q7_t *pDst, uint32_t blockSize);
+  void arm_scale_q7(
+  const q7_t * pSrc,
+        q7_t scaleFract,
+        int8_t shift,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Multiplies a Q15 vector by a scalar.
    * @param[in]  pSrc        points to the input vector
    * @param[in]  scaleFract  fractional portion of the scale value
@@ -217,10 +313,15 @@ void arm_scale_q7(const q7_t *pSrc, q7_t scaleFract, int8_t shift, q7_t *pDst, u
    * @param[out] pDst        points to the output vector
    * @param[in]  blockSize   number of samples in the vector
    */
-void arm_scale_q15(const q15_t *pSrc, q15_t scaleFract, int8_t shift, q15_t *pDst,
-                   uint32_t blockSize);
+  void arm_scale_q15(
+  const q15_t * pSrc,
+        q15_t scaleFract,
+        int8_t shift,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Multiplies a Q31 vector by a scalar.
    * @param[in]  pSrc        points to the input vector
    * @param[in]  scaleFract  fractional portion of the scale value
@@ -228,24 +329,38 @@ void arm_scale_q15(const q15_t *pSrc, q15_t scaleFract, int8_t shift, q15_t *pDs
    * @param[out] pDst        points to the output vector
    * @param[in]  blockSize   number of samples in the vector
    */
-void arm_scale_q31(const q31_t *pSrc, q31_t scaleFract, int8_t shift, q31_t *pDst,
-                   uint32_t blockSize);
+  void arm_scale_q31(
+  const q31_t * pSrc,
+        q31_t scaleFract,
+        int8_t shift,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q7 vector absolute value.
    * @param[in]  pSrc       points to the input buffer
    * @param[out] pDst       points to the output buffer
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_abs_q7(const q7_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_abs_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Floating-point vector absolute value.
    * @param[in]  pSrc       points to the input buffer
    * @param[out] pDst       points to the output buffer
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_abs_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_abs_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
  * @brief Floating-point vector absolute value.
@@ -253,33 +368,51 @@ void arm_abs_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
  * @param[out] pDst       points to the output buffer
  * @param[in]  blockSize  number of samples in each vector
  */
-void arm_abs_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
+void arm_abs_f64(
+const float64_t * pSrc,
+	  float64_t * pDst,
+	  uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief Q15 vector absolute value.
    * @param[in]  pSrc       points to the input buffer
    * @param[out] pDst       points to the output buffer
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_abs_q15(const q15_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_abs_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Q31 vector absolute value.
    * @param[in]  pSrc       points to the input buffer
    * @param[out] pDst       points to the output buffer
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_abs_q31(const q31_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_abs_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Dot product of floating-point vectors.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[in]  blockSize  number of samples in each vector
    * @param[out] result     output result returned here
    */
-void arm_dot_prod_f32(const float32_t *pSrcA, const float32_t *pSrcB, uint32_t blockSize,
-                      float32_t *result);
+  void arm_dot_prod_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * result);
+
+
 
 /**
  * @brief Dot product of floating-point vectors.
@@ -288,62 +421,97 @@ void arm_dot_prod_f32(const float32_t *pSrcA, const float32_t *pSrcB, uint32_t b
  * @param[in]  blockSize  number of samples in each vector
  * @param[out] result     output result returned here
  */
-void arm_dot_prod_f64(const float64_t *pSrcA, const float64_t *pSrcB, uint32_t blockSize,
-                      float64_t *result);
+void arm_dot_prod_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  uint32_t blockSize,
+	  float64_t * result);
 
-/**
+
+
+  /**
    * @brief Dot product of Q7 vectors.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[in]  blockSize  number of samples in each vector
    * @param[out] result     output result returned here
    */
-void arm_dot_prod_q7(const q7_t *pSrcA, const q7_t *pSrcB, uint32_t blockSize, q31_t *result);
+  void arm_dot_prod_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * result);
 
-/**
+
+  /**
    * @brief Dot product of Q15 vectors.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[in]  blockSize  number of samples in each vector
    * @param[out] result     output result returned here
    */
-void arm_dot_prod_q15(const q15_t *pSrcA, const q15_t *pSrcB, uint32_t blockSize, q63_t *result);
+  void arm_dot_prod_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result);
 
-/**
+
+  /**
    * @brief Dot product of Q31 vectors.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[in]  blockSize  number of samples in each vector
    * @param[out] result     output result returned here
    */
-void arm_dot_prod_q31(const q31_t *pSrcA, const q31_t *pSrcB, uint32_t blockSize, q63_t *result);
+  void arm_dot_prod_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result);
 
-/**
+
+  /**
    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_shift_q7(const q7_t *pSrc, int8_t shiftBits, q7_t *pDst, uint32_t blockSize);
+  void arm_shift_q7(
+  const q7_t * pSrc,
+        int8_t shiftBits,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_shift_q15(const q15_t *pSrc, int8_t shiftBits, q15_t *pDst, uint32_t blockSize);
+  void arm_shift_q15(
+  const q15_t * pSrc,
+        int8_t shiftBits,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_shift_q31(const q31_t *pSrc, int8_t shiftBits, q31_t *pDst, uint32_t blockSize);
+  void arm_shift_q31(
+  const q31_t * pSrc,
+        int8_t shiftBits,
+        q31_t * pDst,
+        uint32_t blockSize);
+
 
 /**
  * @brief  Adds a constant offset to a floating-point vector.
@@ -352,51 +520,83 @@ void arm_shift_q31(const q31_t *pSrc, int8_t shiftBits, q31_t *pDst, uint32_t bl
  * @param[out] pDst       points to the output vector
  * @param[in]  blockSize  number of samples in the vector
  */
-void arm_offset_f64(const float64_t *pSrc, float64_t offset, float64_t *pDst, uint32_t blockSize);
+void arm_offset_f64(
+const float64_t * pSrc,
+	  float64_t offset,
+	  float64_t * pDst,
+	  uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief  Adds a constant offset to a floating-point vector.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  offset     is the offset to be added
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_offset_f32(const float32_t *pSrc, float32_t offset, float32_t *pDst, uint32_t blockSize);
+  void arm_offset_f32(
+  const float32_t * pSrc,
+        float32_t offset,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief  Adds a constant offset to a Q7 vector.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  offset     is the offset to be added
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_offset_q7(const q7_t *pSrc, q7_t offset, q7_t *pDst, uint32_t blockSize);
+  void arm_offset_q7(
+  const q7_t * pSrc,
+        q7_t offset,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Adds a constant offset to a Q15 vector.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  offset     is the offset to be added
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_offset_q15(const q15_t *pSrc, q15_t offset, q15_t *pDst, uint32_t blockSize);
+  void arm_offset_q15(
+  const q15_t * pSrc,
+        q15_t offset,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Adds a constant offset to a Q31 vector.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  offset     is the offset to be added
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_offset_q31(const q31_t *pSrc, q31_t offset, q31_t *pDst, uint32_t blockSize);
+  void arm_offset_q31(
+  const q31_t * pSrc,
+        q31_t offset,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Negates the elements of a floating-point vector.
    * @param[in]  pSrc       points to the input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_negate_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_negate_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
  * @brief  Negates the elements of a floating-point vector.
@@ -404,31 +604,47 @@ void arm_negate_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
  * @param[out] pDst       points to the output vector
  * @param[in]  blockSize  number of samples in the vector
  */
-void arm_negate_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
+void arm_negate_f64(
+const float64_t * pSrc,
+	  float64_t * pDst,
+	  uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief  Negates the elements of a Q7 vector.
    * @param[in]  pSrc       points to the input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_negate_q7(const q7_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_negate_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Negates the elements of a Q15 vector.
    * @param[in]  pSrc       points to the input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_negate_q15(const q15_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_negate_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Negates the elements of a Q31 vector.
    * @param[in]  pSrc       points to the input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_negate_q31(const q31_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_negate_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 /**
    * @brief         Compute the logical bitwise AND of two fixed-point vectors.
@@ -436,86 +652,110 @@ void arm_negate_q31(const q31_t *pSrc, q31_t *pDst, uint32_t blockSize);
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_and_u16(const uint16_t *pSrcA, const uint16_t *pSrcB, uint16_t *pDst, uint32_t blockSize);
+  void arm_and_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise AND of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_and_u32(const uint32_t *pSrcA, const uint32_t *pSrcB, uint32_t *pDst, uint32_t blockSize);
+  void arm_and_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise AND of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_and_u8(const uint8_t *pSrcA, const uint8_t *pSrcB, uint8_t *pDst, uint32_t blockSize);
+  void arm_and_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise OR of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_or_u16(const uint16_t *pSrcA, const uint16_t *pSrcB, uint16_t *pDst, uint32_t blockSize);
+  void arm_or_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise OR of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_or_u32(const uint32_t *pSrcA, const uint32_t *pSrcB, uint32_t *pDst, uint32_t blockSize);
+  void arm_or_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise OR of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_or_u8(const uint8_t *pSrcA, const uint8_t *pSrcB, uint8_t *pDst, uint32_t blockSize);
+  void arm_or_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise NOT of a fixed-point vector.
    * @param[in]     pSrc       points to input vector 
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_not_u16(const uint16_t *pSrc, uint16_t *pDst, uint32_t blockSize);
+  void arm_not_u16(
+    const uint16_t * pSrc,
+          uint16_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise NOT of a fixed-point vector.
    * @param[in]     pSrc       points to input vector 
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_not_u32(const uint32_t *pSrc, uint32_t *pDst, uint32_t blockSize);
+  void arm_not_u32(
+    const uint32_t * pSrc,
+          uint32_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise NOT of a fixed-point vector.
    * @param[in]     pSrc       points to input vector 
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_not_u8(const uint8_t *pSrc, uint8_t *pDst, uint32_t blockSize);
+  void arm_not_u8(
+    const uint8_t * pSrc,
+          uint8_t * pDst,
+          uint32_t blockSize);
 
 /**
    * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
@@ -523,80 +763,101 @@ void arm_not_u8(const uint8_t *pSrc, uint8_t *pDst, uint32_t blockSize);
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_xor_u16(const uint16_t *pSrcA, const uint16_t *pSrcB, uint16_t *pDst, uint32_t blockSize);
+  void arm_xor_u16(
+    const uint16_t * pSrcA,
+    const uint16_t * pSrcB,
+          uint16_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_xor_u32(const uint32_t *pSrcA, const uint32_t *pSrcB, uint32_t *pDst, uint32_t blockSize);
+  void arm_xor_u32(
+    const uint32_t * pSrcA,
+    const uint32_t * pSrcB,
+          uint32_t * pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
    * @param[in]     pSrcA      points to input vector A
    * @param[in]     pSrcB      points to input vector B
    * @param[out]    pDst       points to output vector
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_xor_u8(const uint8_t *pSrcA, const uint8_t *pSrcB, uint8_t *pDst, uint32_t blockSize);
+  void arm_xor_u8(
+    const uint8_t * pSrcA,
+    const uint8_t * pSrcB,
+          uint8_t * pDst,
+    uint32_t blockSize);
 
-/**
+  /**
   @brief         Elementwise floating-point clipping
   @param[in]     pSrc          points to input values
   @param[out]    pDst          points to output clipped values
   @param[in]     low           lower bound
   @param[in]     high          higher bound
   @param[in]     numSamples    number of samples to clip
-  @return        none
  */
 
-void arm_clip_f32(const float32_t *pSrc, float32_t *pDst, float32_t low, float32_t high,
-                  uint32_t numSamples);
+void arm_clip_f32(const float32_t * pSrc, 
+  float32_t * pDst, 
+  float32_t low, 
+  float32_t high, 
+  uint32_t numSamples);
 
-/**
+  /**
   @brief         Elementwise fixed-point clipping
   @param[in]     pSrc          points to input values
   @param[out]    pDst          points to output clipped values
   @param[in]     low           lower bound
   @param[in]     high          higher bound
   @param[in]     numSamples    number of samples to clip
-  @return        none
  */
 
-void arm_clip_q31(const q31_t *pSrc, q31_t *pDst, q31_t low, q31_t high, uint32_t numSamples);
+void arm_clip_q31(const q31_t * pSrc, 
+  q31_t * pDst, 
+  q31_t low, 
+  q31_t high, 
+  uint32_t numSamples);
 
-/**
+  /**
   @brief         Elementwise fixed-point clipping
   @param[in]     pSrc          points to input values
   @param[out]    pDst          points to output clipped values
   @param[in]     low           lower bound
   @param[in]     high          higher bound
   @param[in]     numSamples    number of samples to clip
-  @return        none
  */
 
-void arm_clip_q15(const q15_t *pSrc, q15_t *pDst, q15_t low, q15_t high, uint32_t numSamples);
+void arm_clip_q15(const q15_t * pSrc, 
+  q15_t * pDst, 
+  q15_t low, 
+  q15_t high, 
+  uint32_t numSamples);
 
-/**
+  /**
   @brief         Elementwise fixed-point clipping
   @param[in]     pSrc          points to input values
   @param[out]    pDst          points to output clipped values
   @param[in]     low           lower bound
   @param[in]     high          higher bound
   @param[in]     numSamples    number of samples to clip
-  @return        none
  */
 
-void arm_clip_q7(const q7_t *pSrc, q7_t *pDst, q7_t low, q7_t high, uint32_t numSamples);
+void arm_clip_q7(const q7_t * pSrc, 
+  q7_t * pDst, 
+  q7_t low, 
+  q7_t high, 
+  uint32_t numSamples);
+
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/basic_math_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/basic_math_functions_f16.h
old mode 100644
new mode 100755
similarity index 74%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/basic_math_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/basic_math_functions_f16.h
index 6b39dd98122..b3d5ecdc95a
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/basic_math_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/basic_math_functions_f16.h
@@ -23,12 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _BASIC_MATH_FUNCTIONS_F16_H_
-#define _BASIC_MATH_FUNCTIONS_F16_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+ 
+#ifndef BASIC_MATH_FUNCTIONS_F16_H_
+#define BASIC_MATH_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -36,98 +33,132 @@ extern "C" {
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+
+  /**
    * @brief Floating-point vector addition.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_add_f16(const float16_t *pSrcA, const float16_t *pSrcB, float16_t *pDst,
-                 uint32_t blockSize);
+  void arm_add_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Floating-point vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_sub_f16(const float16_t *pSrcA, const float16_t *pSrcB, float16_t *pDst,
-                 uint32_t blockSize);
+  void arm_sub_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+    /**
    * @brief Multiplies a floating-point vector by a scalar.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  scale      scale factor to be applied
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_scale_f16(const float16_t *pSrc, float16_t scale, float16_t *pDst, uint32_t blockSize);
+  void arm_scale_f16(
+  const float16_t * pSrc,
+        float16_t scale,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+    /**
    * @brief Floating-point vector absolute value.
    * @param[in]  pSrc       points to the input buffer
    * @param[out] pDst       points to the output buffer
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_abs_f16(const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_abs_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
 
-/**
+  /**
    * @brief  Adds a constant offset to a floating-point vector.
    * @param[in]  pSrc       points to the input vector
    * @param[in]  offset     is the offset to be added
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_offset_f16(const float16_t *pSrc, float16_t offset, float16_t *pDst, uint32_t blockSize);
+  void arm_offset_f16(
+  const float16_t * pSrc,
+        float16_t offset,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Dot product of floating-point vectors.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[in]  blockSize  number of samples in each vector
    * @param[out] result     output result returned here
    */
-void arm_dot_prod_f16(const float16_t *pSrcA, const float16_t *pSrcB, uint32_t blockSize,
-                      float16_t *result);
+  void arm_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * result);
 
-/**
+  /**
    * @brief Floating-point vector multiplication.
    * @param[in]  pSrcA      points to the first input vector
    * @param[in]  pSrcB      points to the second input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in each vector
    */
-void arm_mult_f16(const float16_t *pSrcA, const float16_t *pSrcB, float16_t *pDst,
-                  uint32_t blockSize);
+  void arm_mult_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Negates the elements of a floating-point vector.
    * @param[in]  pSrc       points to the input vector
    * @param[out] pDst       points to the output vector
    * @param[in]  blockSize  number of samples in the vector
    */
-void arm_negate_f16(const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_negate_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
   @brief         Elementwise floating-point clipping
   @param[in]     pSrc          points to input values
   @param[out]    pDst          points to output clipped values
   @param[in]     low           lower bound
   @param[in]     high          higher bound
   @param[in]     numSamples    number of samples to clip
-  @return        none
  */
-
-void arm_clip_f16(const float16_t *pSrc, float16_t *pDst, float16_t low, float16_t high,
-                  uint32_t numSamples);
+void arm_clip_f16(const float16_t * pSrc, 
+  float16_t * pDst, 
+  float16_t low, 
+  float16_t high, 
+  uint32_t numSamples);
 
 #endif /* defined(ARM_FLOAT16_SUPPORTED)*/
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/bayes_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/bayes_functions.h
old mode 100644
new mode 100755
similarity index 75%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/bayes_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/bayes_functions.h
index 91a1d5f3dad..7b3e0efacbb
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/bayes_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/bayes_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _BAYES_FUNCTIONS_H_
-#define _BAYES_FUNCTIONS_H_
+ 
+#ifndef BAYES_FUNCTIONS_H_
+#define BAYES_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -45,20 +46,22 @@
  * DSP/Testing/PatternGeneration/Bayes.py
  */
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
  * @brief Instance structure for Naive Gaussian Bayesian estimator.
  */
-typedef struct {
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    uint32_t numberOfClasses; /**< Number of different classes  */
-    const float32_t *theta; /**< Mean values for the Gaussians */
-    const float32_t *sigma; /**< Variances for the Gaussians */
-    const float32_t *classPriors; /**< Class prior probabilities */
-    float32_t epsilon; /**< Additive value to variances */
+typedef struct
+{
+  uint32_t vectorDimension;  /**< Dimension of vector space */
+  uint32_t numberOfClasses;  /**< Number of different classes  */
+  const float32_t *theta;          /**< Mean values for the Gaussians */
+  const float32_t *sigma;          /**< Variances for the Gaussians */
+  const float32_t *classPriors;    /**< Class prior probabilities */
+  float32_t epsilon;         /**< Additive value to variances */
 } arm_gaussian_naive_bayes_instance_f32;
 
 /**
@@ -69,14 +72,14 @@ typedef struct {
  * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
  * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
  * @return The predicted class
- *
  */
+uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
+   const float32_t * in, 
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB);
 
-uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S,
-                                              const float32_t *in, float32_t *pOutputProbabilities,
-                                              float32_t *pBufferB);
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/bayes_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/bayes_functions_f16.h
old mode 100644
new mode 100755
similarity index 73%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/bayes_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/bayes_functions_f16.h
index f8131e13d5e..100162ed465
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/bayes_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/bayes_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _BAYES_FUNCTIONS_F16_H_
-#define _BAYES_FUNCTIONS_F16_H_
+ 
+#ifndef BAYES_FUNCTIONS_F16_H_
+#define BAYES_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -34,8 +35,9 @@
 
 #include "dsp/statistics_functions_f16.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
@@ -43,13 +45,14 @@ extern "C" {
 /**
  * @brief Instance structure for Naive Gaussian Bayesian estimator.
  */
-typedef struct {
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    uint32_t numberOfClasses; /**< Number of different classes  */
-    const float16_t *theta; /**< Mean values for the Gaussians */
-    const float16_t *sigma; /**< Variances for the Gaussians */
-    const float16_t *classPriors; /**< Class prior probabilities */
-    float16_t epsilon; /**< Additive value to variances */
+typedef struct
+{
+  uint32_t vectorDimension;  /**< Dimension of vector space */
+  uint32_t numberOfClasses;  /**< Number of different classes  */
+  const float16_t *theta;          /**< Mean values for the Gaussians */
+  const float16_t *sigma;          /**< Variances for the Gaussians */
+  const float16_t *classPriors;    /**< Class prior probabilities */
+  float16_t epsilon;         /**< Additive value to variances */
 } arm_gaussian_naive_bayes_instance_f16;
 
 /**
@@ -60,15 +63,14 @@ typedef struct {
  * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
  * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
  * @return The predicted class
- *
  */
-
-uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S,
-                                              const float16_t *in, float16_t *pOutputProbabilities,
-                                              float16_t *pBufferB);
+uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
+   const float16_t * in, 
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/complex_math_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/complex_math_functions.h
old mode 100644
new mode 100755
similarity index 72%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/complex_math_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/complex_math_functions.h
index 65f09b0489f..bdbc2a4ed49
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/complex_math_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/complex_math_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _COMPLEX_MATH_FUNCTIONS_H_
-#define _COMPLEX_MATH_FUNCTIONS_H_
+ 
+#ifndef COMPLEX_MATH_FUNCTIONS_H_
+#define COMPLEX_MATH_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -33,8 +34,9 @@
 #include "dsp/utils.h"
 #include "dsp/fast_math_functions.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
@@ -47,61 +49,88 @@ extern "C" {
  * real values.
  */
 
-/**
+ /**
    * @brief  Floating-point complex conjugate.
    * @param[in]  pSrc        points to the input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_conj_f32(const float32_t *pSrc, float32_t *pDst, uint32_t numSamples);
+  void arm_cmplx_conj_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
 
-/**
+  /**
    * @brief  Q31 complex conjugate.
    * @param[in]  pSrc        points to the input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_conj_q31(const q31_t *pSrc, q31_t *pDst, uint32_t numSamples);
+  void arm_cmplx_conj_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q15 complex conjugate.
    * @param[in]  pSrc        points to the input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_conj_q15(const q15_t *pSrc, q15_t *pDst, uint32_t numSamples);
+  void arm_cmplx_conj_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Floating-point complex magnitude squared
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_squared_f32(const float32_t *pSrc, float32_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_squared_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Floating-point complex magnitude squared
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_squared_f64(const float64_t *pSrc, float64_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_squared_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q31 complex magnitude squared
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_squared_q31(const q31_t *pSrc, q31_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_squared_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q15 complex magnitude squared
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_squared_q15(const q15_t *pSrc, q15_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_squared_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
+
 
 /**
    * @brief  Floating-point complex magnitude
@@ -109,7 +138,11 @@ void arm_cmplx_mag_squared_q15(const q15_t *pSrc, q15_t *pDst, uint32_t numSampl
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_f32(const float32_t *pSrc, float32_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
+
 
 /**
    * @brief  Floating-point complex magnitude
@@ -117,33 +150,48 @@ void arm_cmplx_mag_f32(const float32_t *pSrc, float32_t *pDst, uint32_t numSampl
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_f64(const float64_t *pSrc, float64_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q31 complex magnitude
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_q31(const q31_t *pSrc, q31_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q15 complex magnitude
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_q15(const q15_t *pSrc, q15_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
 
-/**
+  /**
    * @brief  Q15 complex magnitude
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_fast_q15(const q15_t *pSrc, q15_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_fast_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q15 complex dot product
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
@@ -151,10 +199,15 @@ void arm_cmplx_mag_fast_q15(const q15_t *pSrc, q15_t *pDst, uint32_t numSamples)
    * @param[out] realResult  real part of the result returned here
    * @param[out] imagResult  imaginary part of the result returned here
    */
-void arm_cmplx_dot_prod_q15(const q15_t *pSrcA, const q15_t *pSrcB, uint32_t numSamples,
-                            q31_t *realResult, q31_t *imagResult);
+  void arm_cmplx_dot_prod_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t numSamples,
+        q31_t * realResult,
+        q31_t * imagResult);
 
-/**
+
+  /**
    * @brief  Q31 complex dot product
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
@@ -162,10 +215,15 @@ void arm_cmplx_dot_prod_q15(const q15_t *pSrcA, const q15_t *pSrcB, uint32_t num
    * @param[out] realResult  real part of the result returned here
    * @param[out] imagResult  imaginary part of the result returned here
    */
-void arm_cmplx_dot_prod_q31(const q31_t *pSrcA, const q31_t *pSrcB, uint32_t numSamples,
-                            q63_t *realResult, q63_t *imagResult);
+  void arm_cmplx_dot_prod_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t numSamples,
+        q63_t * realResult,
+        q63_t * imagResult);
 
-/**
+
+  /**
    * @brief  Floating-point complex dot product
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
@@ -173,68 +231,97 @@ void arm_cmplx_dot_prod_q31(const q31_t *pSrcA, const q31_t *pSrcB, uint32_t num
    * @param[out] realResult  real part of the result returned here
    * @param[out] imagResult  imaginary part of the result returned here
    */
-void arm_cmplx_dot_prod_f32(const float32_t *pSrcA, const float32_t *pSrcB, uint32_t numSamples,
-                            float32_t *realResult, float32_t *imagResult);
+  void arm_cmplx_dot_prod_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t numSamples,
+        float32_t * realResult,
+        float32_t * imagResult);
 
-/**
+
+  /**
    * @brief  Q15 complex-by-real multiplication
    * @param[in]  pSrcCmplx   points to the complex input vector
    * @param[in]  pSrcReal    points to the real input vector
    * @param[out] pCmplxDst   points to the complex output vector
    * @param[in]  numSamples  number of samples in each vector
    */
-void arm_cmplx_mult_real_q15(const q15_t *pSrcCmplx, const q15_t *pSrcReal, q15_t *pCmplxDst,
-                             uint32_t numSamples);
+  void arm_cmplx_mult_real_q15(
+  const q15_t * pSrcCmplx,
+  const q15_t * pSrcReal,
+        q15_t * pCmplxDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q31 complex-by-real multiplication
    * @param[in]  pSrcCmplx   points to the complex input vector
    * @param[in]  pSrcReal    points to the real input vector
    * @param[out] pCmplxDst   points to the complex output vector
    * @param[in]  numSamples  number of samples in each vector
    */
-void arm_cmplx_mult_real_q31(const q31_t *pSrcCmplx, const q31_t *pSrcReal, q31_t *pCmplxDst,
-                             uint32_t numSamples);
+  void arm_cmplx_mult_real_q31(
+  const q31_t * pSrcCmplx,
+  const q31_t * pSrcReal,
+        q31_t * pCmplxDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Floating-point complex-by-real multiplication
    * @param[in]  pSrcCmplx   points to the complex input vector
    * @param[in]  pSrcReal    points to the real input vector
    * @param[out] pCmplxDst   points to the complex output vector
    * @param[in]  numSamples  number of samples in each vector
    */
-void arm_cmplx_mult_real_f32(const float32_t *pSrcCmplx, const float32_t *pSrcReal,
-                             float32_t *pCmplxDst, uint32_t numSamples);
+  void arm_cmplx_mult_real_f32(
+  const float32_t * pSrcCmplx,
+  const float32_t * pSrcReal,
+        float32_t * pCmplxDst,
+        uint32_t numSamples);
 
-/**
+  /**
    * @brief  Q15 complex-by-complex multiplication
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_mult_cmplx_q15(const q15_t *pSrcA, const q15_t *pSrcB, q15_t *pDst,
-                              uint32_t numSamples);
+  void arm_cmplx_mult_cmplx_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Q31 complex-by-complex multiplication
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_mult_cmplx_q31(const q31_t *pSrcA, const q31_t *pSrcB, q31_t *pDst,
-                              uint32_t numSamples);
+  void arm_cmplx_mult_cmplx_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t numSamples);
 
-/**
+
+  /**
    * @brief  Floating-point complex-by-complex multiplication
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_mult_cmplx_f32(const float32_t *pSrcA, const float32_t *pSrcB, float32_t *pDst,
-                              uint32_t numSamples);
+  void arm_cmplx_mult_cmplx_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t numSamples);
+
+
 
 /**
  * @brief  Floating-point complex-by-complex multiplication
@@ -243,10 +330,15 @@ void arm_cmplx_mult_cmplx_f32(const float32_t *pSrcA, const float32_t *pSrcB, fl
  * @param[out] pDst        points to the output vector
  * @param[in]  numSamples  number of complex samples in each vector
  */
-void arm_cmplx_mult_cmplx_f64(const float64_t *pSrcA, const float64_t *pSrcB, float64_t *pDst,
-                              uint32_t numSamples);
+void arm_cmplx_mult_cmplx_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  float64_t * pDst,
+	  uint32_t numSamples);
+
+
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/complex_math_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/complex_math_functions_f16.h
old mode 100644
new mode 100755
similarity index 76%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/complex_math_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/complex_math_functions_f16.h
index 98f512b3a3b..bd147325f97
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/complex_math_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/complex_math_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_
-#define _COMPLEX_MATH_FUNCTIONS_F16_H_
+ 
+#ifndef COMPLEX_MATH_FUNCTIONS_F16_H_
+#define COMPLEX_MATH_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -33,37 +34,47 @@
 #include "dsp/utils.h"
 #include "dsp/fast_math_functions_f16.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+ /**
    * @brief  Floating-point complex conjugate.
    * @param[in]  pSrc        points to the input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_conj_f16(const float16_t *pSrc, float16_t *pDst, uint32_t numSamples);
+  void arm_cmplx_conj_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
 
-/**
+ /**
    * @brief  Floating-point complex magnitude squared
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_squared_f16(const float16_t *pSrc, float16_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_squared_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
 
-/**
+  /**
    * @brief  Floating-point complex magnitude
    * @param[in]  pSrc        points to the complex input vector
    * @param[out] pDst        points to the real output vector
    * @param[in]  numSamples  number of complex samples in the input vector
    */
-void arm_cmplx_mag_f16(const float16_t *pSrc, float16_t *pDst, uint32_t numSamples);
+  void arm_cmplx_mag_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
 
-/**
+  /**
    * @brief  Floating-point complex dot product
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
@@ -71,31 +82,41 @@ void arm_cmplx_mag_f16(const float16_t *pSrc, float16_t *pDst, uint32_t numSampl
    * @param[out] realResult  real part of the result returned here
    * @param[out] imagResult  imaginary part of the result returned here
    */
-void arm_cmplx_dot_prod_f16(const float16_t *pSrcA, const float16_t *pSrcB, uint32_t numSamples,
-                            float16_t *realResult, float16_t *imagResult);
+  void arm_cmplx_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t numSamples,
+        float16_t * realResult,
+        float16_t * imagResult);
 
-/**
+   /**
    * @brief  Floating-point complex-by-real multiplication
    * @param[in]  pSrcCmplx   points to the complex input vector
    * @param[in]  pSrcReal    points to the real input vector
    * @param[out] pCmplxDst   points to the complex output vector
    * @param[in]  numSamples  number of samples in each vector
    */
-void arm_cmplx_mult_real_f16(const float16_t *pSrcCmplx, const float16_t *pSrcReal,
-                             float16_t *pCmplxDst, uint32_t numSamples);
+  void arm_cmplx_mult_real_f16(
+  const float16_t * pSrcCmplx,
+  const float16_t * pSrcReal,
+        float16_t * pCmplxDst,
+        uint32_t numSamples);
 
-/**
+  /**
    * @brief  Floating-point complex-by-complex multiplication
    * @param[in]  pSrcA       points to the first input vector
    * @param[in]  pSrcB       points to the second input vector
    * @param[out] pDst        points to the output vector
    * @param[in]  numSamples  number of complex samples in each vector
    */
-void arm_cmplx_mult_cmplx_f16(const float16_t *pSrcA, const float16_t *pSrcB, float16_t *pDst,
-                              uint32_t numSamples);
+  void arm_cmplx_mult_cmplx_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t numSamples);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/controller_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/controller_functions.h
old mode 100644
new mode 100755
similarity index 73%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/controller_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/controller_functions.h
index a892b9ba8f4..5d5de98ea58
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/controller_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/controller_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _CONTROLLER_FUNCTIONS_H_
-#define _CONTROLLER_FUNCTIONS_H_
+ 
+#ifndef CONTROLLER_FUNCTIONS_H_
+#define CONTROLLER_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -32,56 +33,82 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
-/**
+  /**
    * @brief Macros required for SINE and COSINE Controller functions
    */
 
-#define CONTROLLER_Q31_SHIFT (32 - 9)
-/* 1.31(q31) Fixed value of 2/360 */
-/* -1 to +1 is divided into 360 values so total spacing is (2/360) */
-#define INPUT_SPACING 0xB60B61
-
+#define CONTROLLER_Q31_SHIFT  (32 - 9)
+  /* 1.31(q31) Fixed value of 2/360 */
+  /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
+#define INPUT_SPACING         0xB60B61
+  
 /**
  * @defgroup groupController Controller Functions
  */
 
+
 /**
-   * @ingroup groupController
-   */
+  @ingroup groupController
+ */
 
 /**
-   * @addtogroup SinCos
-   * @{
-   */
+  @defgroup SinCos Sine Cosine
+
+  Computes the trigonometric sine and cosine values using a combination of table lookup
+  and linear interpolation.
+  There are separate functions for Q31 and floating-point data types.
+  The input to the floating-point version is in degrees while the
+  fixed-point Q31 have a scaled input with the range
+  [-1 0.9999] mapping to [-180 +180] degrees.
+
+  The floating point function also allows values that are out of the usual range. When this happens, the function will
+  take extra time to adjust the input value to the range of [-180 180].
+
+  The result is accurate to 5 digits after the decimal point.
 
+  The implementation is based on table lookup using 360 values together with linear interpolation.
+  The steps used are:
+   -# Calculation of the nearest integer table index.
+   -# Compute the fractional portion (fract) of the input.
+   -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
+   -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
+   -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
+   -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
+ */
+ 
 /**
    * @brief  Floating-point sin_cos function.
    * @param[in]  theta   input value in degrees
    * @param[out] pSinVal  points to the processed sine output.
    * @param[out] pCosVal  points to the processed cos output.
    */
-void arm_sin_cos_f32(float32_t theta, float32_t *pSinVal, float32_t *pCosVal);
+  void arm_sin_cos_f32(
+        float32_t theta,
+        float32_t * pSinVal,
+        float32_t * pCosVal);
 
-/**
+
+  /**
    * @brief  Q31 sin_cos function.
    * @param[in]  theta    scaled input value in degrees
    * @param[out] pSinVal  points to the processed sine output.
    * @param[out] pCosVal  points to the processed cosine output.
    */
-void arm_sin_cos_q31(q31_t theta, q31_t *pSinVal, q31_t *pCosVal);
+  void arm_sin_cos_q31(
+        q31_t theta,
+        q31_t * pSinVal,
+        q31_t * pCosVal);
 
-/**
-   * @} end of SinCos group
-   */
 
 /**
-   * @ingroup groupController
-   */
-
+  @ingroup groupController
+ */
+  
 /**
    * @defgroup PID PID Motor Control
    *
@@ -140,106 +167,132 @@ void arm_sin_cos_q31(q31_t theta, q31_t *pSinVal, q31_t *pCosVal);
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-/**
+
+  /**
+   * @ingroup PID
    * @brief Instance structure for the Q15 PID Control.
    */
-typedef struct {
-    q15_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */
-#if !defined(ARM_MATH_DSP)
-    q15_t A1; /**< The derived gain A1 = -Kp - 2Kd */
-    q15_t A2; /**< The derived gain A1 = Kd. */
+  typedef struct
+  {
+          q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
+#if !defined (ARM_MATH_DSP)
+          q15_t A1;           /**< The derived gain A1 = -Kp - 2Kd */
+          q15_t A2;           /**< The derived gain A1 = Kd. */
 #else
-    q31_t A1; /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
+          q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
 #endif
-    q15_t state[3]; /**< The state array of length 3. */
-    q15_t Kp; /**< The proportional gain. */
-    q15_t Ki; /**< The integral gain. */
-    q15_t Kd; /**< The derivative gain. */
-} arm_pid_instance_q15;
-
-/**
+          q15_t state[3];     /**< The state array of length 3. */
+          q15_t Kp;           /**< The proportional gain. */
+          q15_t Ki;           /**< The integral gain. */
+          q15_t Kd;           /**< The derivative gain. */
+  } arm_pid_instance_q15;
+
+  /**
+   * @ingroup PID
    * @brief Instance structure for the Q31 PID Control.
    */
-typedef struct {
-    q31_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */
-    q31_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */
-    q31_t A2; /**< The derived gain, A2 = Kd . */
-    q31_t state[3]; /**< The state array of length 3. */
-    q31_t Kp; /**< The proportional gain. */
-    q31_t Ki; /**< The integral gain. */
-    q31_t Kd; /**< The derivative gain. */
-} arm_pid_instance_q31;
-
-/**
+  typedef struct
+  {
+          q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
+          q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
+          q31_t A2;            /**< The derived gain, A2 = Kd . */
+          q31_t state[3];      /**< The state array of length 3. */
+          q31_t Kp;            /**< The proportional gain. */
+          q31_t Ki;            /**< The integral gain. */
+          q31_t Kd;            /**< The derivative gain. */
+  } arm_pid_instance_q31;
+
+  /**
+   * @ingroup PID
    * @brief Instance structure for the floating-point PID Control.
    */
-typedef struct {
-    float32_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */
-    float32_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */
-    float32_t A2; /**< The derived gain, A2 = Kd . */
-    float32_t state[3]; /**< The state array of length 3. */
-    float32_t Kp; /**< The proportional gain. */
-    float32_t Ki; /**< The integral gain. */
-    float32_t Kd; /**< The derivative gain. */
-} arm_pid_instance_f32;
+  typedef struct
+  {
+          float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
+          float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
+          float32_t A2;          /**< The derived gain, A2 = Kd . */
+          float32_t state[3];    /**< The state array of length 3. */
+          float32_t Kp;          /**< The proportional gain. */
+          float32_t Ki;          /**< The integral gain. */
+          float32_t Kd;          /**< The derivative gain. */
+  } arm_pid_instance_f32;
 
-/**
+
+
+  /**
    * @brief  Initialization function for the floating-point PID Control.
    * @param[in,out] S               points to an instance of the PID structure.
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
-void arm_pid_init_f32(arm_pid_instance_f32 *S, int32_t resetStateFlag);
+  void arm_pid_init_f32(
+        arm_pid_instance_f32 * S,
+        int32_t resetStateFlag);
 
-/**
+
+  /**
    * @brief  Reset function for the floating-point PID Control.
    * @param[in,out] S  is an instance of the floating-point PID Control structure
    */
-void arm_pid_reset_f32(arm_pid_instance_f32 *S);
+  void arm_pid_reset_f32(
+        arm_pid_instance_f32 * S);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q31 PID Control.
    * @param[in,out] S               points to an instance of the Q15 PID structure.
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
-void arm_pid_init_q31(arm_pid_instance_q31 *S, int32_t resetStateFlag);
+  void arm_pid_init_q31(
+        arm_pid_instance_q31 * S,
+        int32_t resetStateFlag);
 
-/**
+
+  /**
    * @brief  Reset function for the Q31 PID Control.
    * @param[in,out] S   points to an instance of the Q31 PID Control structure
    */
 
-void arm_pid_reset_q31(arm_pid_instance_q31 *S);
+  void arm_pid_reset_q31(
+        arm_pid_instance_q31 * S);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q15 PID Control.
    * @param[in,out] S               points to an instance of the Q15 PID structure.
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
-void arm_pid_init_q15(arm_pid_instance_q15 *S, int32_t resetStateFlag);
+  void arm_pid_init_q15(
+        arm_pid_instance_q15 * S,
+        int32_t resetStateFlag);
 
-/**
+
+  /**
    * @brief  Reset function for the Q15 PID Control.
    * @param[in,out] S  points to an instance of the q15 PID Control structure
    */
-void arm_pid_reset_q15(arm_pid_instance_q15 *S);
+  void arm_pid_reset_q15(
+        arm_pid_instance_q15 * S);
 
-/**
-   * @addtogroup PID
-   * @{
-   */
 
-/**
+
+
+
+  /**
+   * @ingroup PID
    * @brief         Process function for the floating-point PID Control.
    * @param[in,out] S   is an instance of the floating-point PID Control structure
    * @param[in]     in  input sample to process
    * @return        processed output sample.
    */
-__STATIC_FORCEINLINE float32_t arm_pid_f32(arm_pid_instance_f32 *S, float32_t in)
-{
+  __STATIC_FORCEINLINE float32_t arm_pid_f32(
+  arm_pid_instance_f32 * S,
+  float32_t in)
+  {
     float32_t out;
 
     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
-    out = (S->A0 * in) + (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
+    out = (S->A0 * in) +
+      (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
 
     /* Update state */
     S->state[1] = S->state[0];
@@ -248,9 +301,11 @@ __STATIC_FORCEINLINE float32_t arm_pid_f32(arm_pid_instance_f32 *S, float32_t in
 
     /* return to application */
     return (out);
-}
+
+  }
 
 /**
+  @ingroup PID
   @brief         Process function for the Q31 PID Control.
   @param[in,out] S  points to an instance of the Q31 PID Control structure
   @param[in]     in  input sample to process
@@ -263,22 +318,24 @@ __STATIC_FORCEINLINE float32_t arm_pid_f32(arm_pid_instance_f32 *S, float32_t in
          In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
          After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
  */
-__STATIC_FORCEINLINE q31_t arm_pid_q31(arm_pid_instance_q31 *S, q31_t in)
-{
+__STATIC_FORCEINLINE q31_t arm_pid_q31(
+  arm_pid_instance_q31 * S,
+  q31_t in)
+  {
     q63_t acc;
     q31_t out;
 
     /* acc = A0 * x[n]  */
-    acc = (q63_t)S->A0 * in;
+    acc = (q63_t) S->A0 * in;
 
     /* acc += A1 * x[n-1] */
-    acc += (q63_t)S->A1 * S->state[0];
+    acc += (q63_t) S->A1 * S->state[0];
 
     /* acc += A2 * x[n-2]  */
-    acc += (q63_t)S->A2 * S->state[1];
+    acc += (q63_t) S->A2 * S->state[1];
 
     /* convert output to 1.31 format to add y[n-1] */
-    out = (q31_t)(acc >> 31U);
+    out = (q31_t) (acc >> 31U);
 
     /* out += y[n-1] */
     out += S->state[2];
@@ -290,9 +347,11 @@ __STATIC_FORCEINLINE q31_t arm_pid_q31(arm_pid_instance_q31 *S, q31_t in)
 
     /* return to application */
     return (out);
-}
+  }
+
 
 /**
+  @ingroup PID
   @brief         Process function for the Q15 PID Control.
   @param[in,out] S   points to an instance of the Q15 PID Control structure
   @param[in]     in  input sample to process
@@ -306,33 +365,35 @@ __STATIC_FORCEINLINE q31_t arm_pid_q31(arm_pid_instance_q31 *S, q31_t in)
          After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
          Lastly, the accumulator is saturated to yield a result in 1.15 format.
  */
-__STATIC_FORCEINLINE q15_t arm_pid_q15(arm_pid_instance_q15 *S, q15_t in)
-{
+__STATIC_FORCEINLINE q15_t arm_pid_q15(
+  arm_pid_instance_q15 * S,
+  q15_t in)
+  {
     q63_t acc;
     q15_t out;
 
-#if defined(ARM_MATH_DSP)
+#if defined (ARM_MATH_DSP)
     /* Implementation of PID controller */
 
     /* acc = A0 * x[n]  */
-    acc = (q31_t)__SMUAD((uint32_t)S->A0, (uint32_t)in);
+    acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
 
     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
-    acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2(S->state), (uint64_t)acc);
+    acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc);
 #else
     /* acc = A0 * x[n]  */
-    acc = ((q31_t)S->A0) * in;
+    acc = ((q31_t) S->A0) * in;
 
     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
-    acc += (q31_t)S->A1 * S->state[0];
-    acc += (q31_t)S->A2 * S->state[1];
+    acc += (q31_t) S->A1 * S->state[0];
+    acc += (q31_t) S->A2 * S->state[1];
 #endif
 
     /* acc += y[n-1] */
-    acc += (q31_t)S->state[2] << 15;
+    acc += (q31_t) S->state[2] << 15;
 
     /* saturate the output */
-    out = (q15_t)(__SSAT((q31_t)(acc >> 15), 16));
+    out = (q15_t) (__SSAT((q31_t)(acc >> 15), 16));
 
     /* Update state */
     S->state[1] = S->state[0];
@@ -341,17 +402,15 @@ __STATIC_FORCEINLINE q15_t arm_pid_q15(arm_pid_instance_q15 *S, q15_t in)
 
     /* return to application */
     return (out);
-}
+  }
 
-/**
-   * @} end of PID group
-   */
 
-/**
+
+  /**
    * @ingroup groupController
    */
 
-/**
+  /**
    * @defgroup park Vector Park Transform
    *
    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
@@ -375,12 +434,10 @@ __STATIC_FORCEINLINE q15_t arm_pid_q15(arm_pid_instance_q15 *S, q15_t in)
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-/**
-   * @addtogroup park
-   * @{
-   */
+ 
 
-/**
+  /**
+   * @ingroup park
    * @brief Floating-point Park transform
    * @param[in]  Ialpha  input two-phase vector coordinate alpha
    * @param[in]  Ibeta   input two-phase vector coordinate beta
@@ -388,22 +445,28 @@ __STATIC_FORCEINLINE q15_t arm_pid_q15(arm_pid_instance_q15 *S, q15_t in)
    * @param[out] pIq     points to output   rotor reference frame q
    * @param[in]  sinVal  sine value of rotation angle theta
    * @param[in]  cosVal  cosine value of rotation angle theta
-   * @return     none
    *
    * The function implements the forward Park transform.
    *
    */
-__STATIC_FORCEINLINE void arm_park_f32(float32_t Ialpha, float32_t Ibeta, float32_t *pId,
-                                       float32_t *pIq, float32_t sinVal, float32_t cosVal)
-{
+  __STATIC_FORCEINLINE void arm_park_f32(
+  float32_t Ialpha,
+  float32_t Ibeta,
+  float32_t * pId,
+  float32_t * pIq,
+  float32_t sinVal,
+  float32_t cosVal)
+  {
     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
     *pId = Ialpha * cosVal + Ibeta * sinVal;
 
     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
-}
+  }
+
 
 /**
+  @ingroup park
   @brief  Park transform for Q31 version
   @param[in]  Ialpha  input two-phase vector coordinate alpha
   @param[in]  Ibeta   input two-phase vector coordinate beta
@@ -411,47 +474,50 @@ __STATIC_FORCEINLINE void arm_park_f32(float32_t Ialpha, float32_t Ibeta, float3
   @param[out] pIq     points to output rotor reference frame q
   @param[in]  sinVal  sine value of rotation angle theta
   @param[in]  cosVal  cosine value of rotation angle theta
-  @return     none
 
   \par Scaling and Overflow Behavior
          The function is implemented using an internal 32-bit accumulator.
          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
          There is saturation on the addition and subtraction, hence there is no risk of overflow.
  */
-__STATIC_FORCEINLINE void arm_park_q31(q31_t Ialpha, q31_t Ibeta, q31_t *pId, q31_t *pIq,
-                                       q31_t sinVal, q31_t cosVal)
-{
-    q31_t product1, product2; /* Temporary variables used to store intermediate results */
-    q31_t product3, product4; /* Temporary variables used to store intermediate results */
+__STATIC_FORCEINLINE void arm_park_q31(
+  q31_t Ialpha,
+  q31_t Ibeta,
+  q31_t * pId,
+  q31_t * pIq,
+  q31_t sinVal,
+  q31_t cosVal)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+    q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
 
     /* Intermediate product is calculated by (Ialpha * cosVal) */
-    product1 = (q31_t)(((q63_t)(Ialpha) * (cosVal)) >> 31);
+    product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
 
     /* Intermediate product is calculated by (Ibeta * sinVal) */
-    product2 = (q31_t)(((q63_t)(Ibeta) * (sinVal)) >> 31);
+    product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
+
 
     /* Intermediate product is calculated by (Ialpha * sinVal) */
-    product3 = (q31_t)(((q63_t)(Ialpha) * (sinVal)) >> 31);
+    product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
 
     /* Intermediate product is calculated by (Ibeta * cosVal) */
-    product4 = (q31_t)(((q63_t)(Ibeta) * (cosVal)) >> 31);
+    product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
 
     /* Calculate pId by adding the two intermediate products 1 and 2 */
     *pId = __QADD(product1, product2);
 
     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
     *pIq = __QSUB(product4, product3);
-}
+  }
 
-/**
-   * @} end of park group
-   */
 
-/**
+
+  /**
    * @ingroup groupController
    */
 
-/**
+  /**
    * @defgroup inv_park Vector Inverse Park transform
    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
    *
@@ -468,12 +534,10 @@ __STATIC_FORCEINLINE void arm_park_q31(q31_t Ialpha, q31_t Ibeta, q31_t *pId, q3
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-/**
-   * @addtogroup inv_park
-   * @{
-   */
+  
 
-/**
+   /**
+   * @ingroup inv_park
    * @brief  Floating-point Inverse Park transform
    * @param[in]  Id       input coordinate of rotor reference frame d
    * @param[in]  Iq       input coordinate of rotor reference frame q
@@ -481,19 +545,25 @@ __STATIC_FORCEINLINE void arm_park_q31(q31_t Ialpha, q31_t Ibeta, q31_t *pId, q3
    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
    * @param[in]  sinVal   sine value of rotation angle theta
    * @param[in]  cosVal   cosine value of rotation angle theta
-   * @return     none
    */
-__STATIC_FORCEINLINE void arm_inv_park_f32(float32_t Id, float32_t Iq, float32_t *pIalpha,
-                                           float32_t *pIbeta, float32_t sinVal, float32_t cosVal)
-{
+  __STATIC_FORCEINLINE void arm_inv_park_f32(
+  float32_t Id,
+  float32_t Iq,
+  float32_t * pIalpha,
+  float32_t * pIbeta,
+  float32_t sinVal,
+  float32_t cosVal)
+  {
     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
     *pIalpha = Id * cosVal - Iq * sinVal;
 
     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
     *pIbeta = Id * sinVal + Iq * cosVal;
-}
+  }
+
 
 /**
+  @ingroup inv_park
   @brief  Inverse Park transform for   Q31 version
   @param[in]  Id       input coordinate of rotor reference frame d
   @param[in]  Iq       input coordinate of rotor reference frame q
@@ -501,47 +571,49 @@ __STATIC_FORCEINLINE void arm_inv_park_f32(float32_t Id, float32_t Iq, float32_t
   @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
   @param[in]  sinVal   sine value of rotation angle theta
   @param[in]  cosVal   cosine value of rotation angle theta
-  @return     none
 
   @par Scaling and Overflow Behavior
          The function is implemented using an internal 32-bit accumulator.
          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
          There is saturation on the addition, hence there is no risk of overflow.
  */
-__STATIC_FORCEINLINE void arm_inv_park_q31(q31_t Id, q31_t Iq, q31_t *pIalpha, q31_t *pIbeta,
-                                           q31_t sinVal, q31_t cosVal)
-{
-    q31_t product1, product2; /* Temporary variables used to store intermediate results */
-    q31_t product3, product4; /* Temporary variables used to store intermediate results */
+__STATIC_FORCEINLINE void arm_inv_park_q31(
+  q31_t Id,
+  q31_t Iq,
+  q31_t * pIalpha,
+  q31_t * pIbeta,
+  q31_t sinVal,
+  q31_t cosVal)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+    q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
 
     /* Intermediate product is calculated by (Id * cosVal) */
-    product1 = (q31_t)(((q63_t)(Id) * (cosVal)) >> 31);
+    product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
 
     /* Intermediate product is calculated by (Iq * sinVal) */
-    product2 = (q31_t)(((q63_t)(Iq) * (sinVal)) >> 31);
+    product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
+
 
     /* Intermediate product is calculated by (Id * sinVal) */
-    product3 = (q31_t)(((q63_t)(Id) * (sinVal)) >> 31);
+    product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
 
     /* Intermediate product is calculated by (Iq * cosVal) */
-    product4 = (q31_t)(((q63_t)(Iq) * (cosVal)) >> 31);
+    product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
 
     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
     *pIalpha = __QSUB(product1, product2);
 
     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
     *pIbeta = __QADD(product4, product3);
-}
+  }
 
-/**
-   * @} end of Inverse park group
-   */
 
 /**
    * @ingroup groupController
    */
 
-/**
+  /**
    * @defgroup clarke Vector Clarke Transform
    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
@@ -563,69 +635,71 @@ __STATIC_FORCEINLINE void arm_inv_park_q31(q31_t Id, q31_t Iq, q31_t *pIalpha, q
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-/**
-   * @addtogroup clarke
-   * @{
-   */
 
-/**
+  /**
    *
+   * @ingroup clarke
    * @brief  Floating-point Clarke transform
    * @param[in]  Ia       input three-phase coordinate <code>a</code>
    * @param[in]  Ib       input three-phase coordinate <code>b</code>
    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
-   * @return        none
    */
-__STATIC_FORCEINLINE void arm_clarke_f32(float32_t Ia, float32_t Ib, float32_t *pIalpha,
-                                         float32_t *pIbeta)
-{
+  __STATIC_FORCEINLINE void arm_clarke_f32(
+  float32_t Ia,
+  float32_t Ib,
+  float32_t * pIalpha,
+  float32_t * pIbeta)
+  {
     /* Calculate pIalpha using the equation, pIalpha = Ia */
     *pIalpha = Ia;
 
     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
     *pIbeta = (0.57735026919f * Ia + 1.15470053838f * Ib);
-}
+  }
+
 
 /**
+  @ingroup clarke
   @brief  Clarke transform for Q31 version
   @param[in]  Ia       input three-phase coordinate <code>a</code>
   @param[in]  Ib       input three-phase coordinate <code>b</code>
   @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
   @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
-  @return     none
 
   \par Scaling and Overflow Behavior
          The function is implemented using an internal 32-bit accumulator.
          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
          There is saturation on the addition, hence there is no risk of overflow.
  */
-__STATIC_FORCEINLINE void arm_clarke_q31(q31_t Ia, q31_t Ib, q31_t *pIalpha, q31_t *pIbeta)
-{
-    q31_t product1, product2; /* Temporary variables used to store intermediate results */
+__STATIC_FORCEINLINE void arm_clarke_q31(
+  q31_t Ia,
+  q31_t Ib,
+  q31_t * pIalpha,
+  q31_t * pIbeta)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
 
     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
     *pIalpha = Ia;
 
     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
-    product1 = (q31_t)(((q63_t)Ia * 0x24F34E8B) >> 30);
+    product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
 
     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
-    product2 = (q31_t)(((q63_t)Ib * 0x49E69D16) >> 30);
+    product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
 
     /* pIbeta is calculated by adding the intermediate products */
     *pIbeta = __QADD(product1, product2);
-}
+  }
 
-/**
-   * @} end of clarke group
-   */
 
-/**
+
+  /**
    * @ingroup groupController
    */
 
-/**
+  /**
    * @defgroup inv_clarke Vector Inverse Clarke Transform
    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
    *
@@ -641,64 +715,70 @@ __STATIC_FORCEINLINE void arm_clarke_q31(q31_t Ia, q31_t Ib, q31_t *pIalpha, q31
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-/**
-   * @addtogroup inv_clarke
-   * @{
-   */
+ 
 
-/**
+   /**
+   * @ingroup inv_clarke
    * @brief  Floating-point Inverse Clarke transform
    * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
    * @param[out] pIa     points to output three-phase coordinate <code>a</code>
    * @param[out] pIb     points to output three-phase coordinate <code>b</code>
-   * @return     none
    */
-__STATIC_FORCEINLINE void arm_inv_clarke_f32(float32_t Ialpha, float32_t Ibeta, float32_t *pIa,
-                                             float32_t *pIb)
-{
+  __STATIC_FORCEINLINE void arm_inv_clarke_f32(
+  float32_t Ialpha,
+  float32_t Ibeta,
+  float32_t * pIa,
+  float32_t * pIb)
+  {
     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
     *pIa = Ialpha;
 
     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
     *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
-}
+  }
+
 
 /**
+  @ingroup inv_clarke
   @brief  Inverse Clarke transform for Q31 version
   @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
   @param[in]  Ibeta   input two-phase orthogonal vector axis beta
   @param[out] pIa     points to output three-phase coordinate <code>a</code>
   @param[out] pIb     points to output three-phase coordinate <code>b</code>
-  @return     none
 
   \par Scaling and Overflow Behavior
          The function is implemented using an internal 32-bit accumulator.
          The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
          There is saturation on the subtraction, hence there is no risk of overflow.
  */
-__STATIC_FORCEINLINE void arm_inv_clarke_q31(q31_t Ialpha, q31_t Ibeta, q31_t *pIa, q31_t *pIb)
-{
-    q31_t product1, product2; /* Temporary variables used to store intermediate results */
+__STATIC_FORCEINLINE void arm_inv_clarke_q31(
+  q31_t Ialpha,
+  q31_t Ibeta,
+  q31_t * pIa,
+  q31_t * pIb)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
 
     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
     *pIa = Ialpha;
 
     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
-    product1 = (q31_t)(((q63_t)(Ialpha) * (0x40000000)) >> 31);
+    product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
 
     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
-    product2 = (q31_t)(((q63_t)(Ibeta) * (0x6ED9EBA1)) >> 31);
+    product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
 
     /* pIb is calculated by subtracting the products */
     *pIb = __QSUB(product2, product1);
-}
+  }
+
+
+
 
-/**
-   * @} end of inv_clarke group
-   */
 
-#ifdef __cplusplus
+  
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/controller_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/controller_functions_f16.h
old mode 100644
new mode 100755
similarity index 90%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/controller_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/controller_functions_f16.h
index d2e03156351..a4622ec3d5d
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/controller_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/controller_functions_f16.h
@@ -23,16 +23,18 @@
  * limitations under the License.
  */
 
-#ifndef _CONTROLLER_FUNCTIONS_F16_H_
-#define _CONTROLLER_FUNCTIONS_F16_H_
+ 
+#ifndef CONTROLLER_FUNCTIONS_F16_H_
+#define CONTROLLER_FUNCTIONS_F16_H_
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/debug.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/debug.h
new file mode 100644
index 00000000000..b98e038931b
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/debug.h
@@ -0,0 +1,146 @@
+/******************************************************************************
+ * @file     basic_math_functions.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef DEBUG_FUNCTIONS_H_
+#define DEBUG_FUNCTIONS_H_
+
+#include "arm_math_types.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/matrix_functions.h"
+#include "dsp/matrix_functions_f16.h"
+
+#include <stdio.h>
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+#define PROW_f16(S,NB)            \
+{                                 \
+    printf("{%f",(double)(S)[0]);   \
+    for(unsigned int i=1;i<(NB) ;i++)       \
+    {                             \
+       printf(",%f",(double)(S)[i]);\
+    }                             \
+    printf("}");                  \
+};
+
+#define PV_f16(S,V,NB)\
+{                     \
+    printf("%s=",(S));  \
+    PROW_f16((V),(NB));   \
+    printf(";\n");    \
+};
+
+#define PM_f16(S,M)                                       \
+{                                                         \
+    printf("%s={",(S));                                     \
+    for(unsigned int row=0;row<(M)->numRows;row++)                   \
+    {                                                     \
+        if (row != 0)                                     \
+        {                                                 \
+            printf("\n,");                                \
+        }                                                 \
+        PROW_f16((M)->pData + row * (M)->numCols, (M)->numCols);\
+    }                                                     \
+    printf("};\n");                                       \
+}
+
+#endif 
+
+#define PROW_f32(S,NB)            \
+{                                 \
+    printf("{%f",(double)(S)[0]);   \
+    for(unsigned int i=1;i<(NB) ;i++)       \
+    {                             \
+       printf(",%f",(double)(S)[i]);\
+    }                             \
+    printf("}");                  \
+};
+
+#define PV_f32(S,V,NB)\
+{                     \
+    printf("%s=",(S));  \
+    PROW_f32((V),(NB));   \
+    printf(";\n");    \
+};
+
+#define PM_f32(S,M)                                       \
+{                                                         \
+    printf("%s={",(S));                                     \
+    for(unsigned int row=0;row<(M)->numRows;row++)                   \
+    {                                                     \
+        if (row != 0)                                     \
+        {                                                 \
+            printf("\n,");                                \
+        }                                                 \
+        PROW_f32((M)->pData + row * (M)->numCols, (M)->numCols);\
+    }                                                     \
+    printf("};\n");                                       \
+}
+
+#define PROW_f64(S,NB)            \
+{                                 \
+    printf("{%.20g",(double)(S)[0]);   \
+    for(unsigned int i=1;i<(NB) ;i++)       \
+    {                             \
+       printf(",%.20g",(double)(S)[i]);\
+    }                             \
+    printf("}");                  \
+};
+
+#define PV_f64(S,V,NB) \
+{                      \
+    printf("%s=",(S)); \
+    PROW_f64((V),(NB));\
+    printf(";\n");     \
+};
+
+#define PM_f64(S,M)                                       \
+{                                                         \
+    printf("%s={",(S));                                     \
+    for(unsigned int row=0;row<(M)->numRows;row++)                   \
+    {                                                     \
+        if (row != 0)                                     \
+        {                                                 \
+            printf("\n,");                                \
+        }                                                 \
+        PROW_f64((M)->pData + row * (M)->numCols, (M)->numCols);\
+    }                                                     \
+    printf("};\n");                                       \
+}
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _DEBUG_FUNCTIONS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/distance_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/distance_functions.h
old mode 100644
new mode 100755
similarity index 72%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/distance_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/distance_functions.h
index 2273b645261..995efab8b63
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/distance_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/distance_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _DISTANCE_FUNCTIONS_H_
-#define _DISTANCE_FUNCTIONS_H_
+ 
+#ifndef DISTANCE_FUNCTIONS_H_
+#define DISTANCE_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -35,13 +36,16 @@
 #include "dsp/statistics_functions.h"
 #include "dsp/basic_math_functions.h"
 #include "dsp/fast_math_functions.h"
+#include "dsp/matrix_functions.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
+
 /**
- * @defgroup groupDistance Distance functions
+ * @defgroup groupDistance Distance Functions
  *
  * Distance functions for use with clustering algorithms.
  * There are distance functions for float vectors and boolean vectors.
@@ -49,11 +53,11 @@ extern "C" {
  */
 
 /* 6.14 bug */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
-
+#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
+ 
 __attribute__((weak)) float __powisf2(float a, int b);
 
-#endif
+#endif 
 
 /**
  * @brief        Euclidean distance between two vectors
@@ -64,7 +68,7 @@ __attribute__((weak)) float __powisf2(float a, int b);
  *
  */
 
-float32_t arm_euclidean_distance_f32(const float32_t *pA, const float32_t *pB, uint32_t blockSize);
+float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Euclidean distance between two vectors
@@ -75,7 +79,7 @@ float32_t arm_euclidean_distance_f32(const float32_t *pA, const float32_t *pB, u
  *
  */
 
-float64_t arm_euclidean_distance_f64(const float64_t *pA, const float64_t *pB, uint32_t blockSize);
+float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Bray-Curtis distance between two vectors
@@ -85,7 +89,7 @@ float64_t arm_euclidean_distance_f64(const float64_t *pA, const float64_t *pB, u
  * @return distance
  *
  */
-float32_t arm_braycurtis_distance_f32(const float32_t *pA, const float32_t *pB, uint32_t blockSize);
+float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Canberra distance between two vectors
@@ -100,7 +104,8 @@ float32_t arm_braycurtis_distance_f32(const float32_t *pA, const float32_t *pB,
  * @return distance
  *
  */
-float32_t arm_canberra_distance_f32(const float32_t *pA, const float32_t *pB, uint32_t blockSize);
+float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Chebyshev distance between two vectors
@@ -110,7 +115,8 @@ float32_t arm_canberra_distance_f32(const float32_t *pA, const float32_t *pB, ui
  * @return distance
  *
  */
-float32_t arm_chebyshev_distance_f32(const float32_t *pA, const float32_t *pB, uint32_t blockSize);
+float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Chebyshev distance between two vectors
@@ -120,7 +126,8 @@ float32_t arm_chebyshev_distance_f32(const float32_t *pA, const float32_t *pB, u
  * @return distance
  *
  */
-float64_t arm_chebyshev_distance_f64(const float64_t *pA, const float64_t *pB, uint32_t blockSize);
+float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Cityblock (Manhattan) distance between two vectors
@@ -130,7 +137,7 @@ float64_t arm_chebyshev_distance_f64(const float64_t *pA, const float64_t *pB, u
  * @return distance
  *
  */
-float32_t arm_cityblock_distance_f32(const float32_t *pA, const float32_t *pB, uint32_t blockSize);
+float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Cityblock (Manhattan) distance between two vectors
@@ -140,7 +147,7 @@ float32_t arm_cityblock_distance_f32(const float32_t *pA, const float32_t *pB, u
  * @return distance
  *
  */
-float64_t arm_cityblock_distance_f64(const float64_t *pA, const float64_t *pB, uint32_t blockSize);
+float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Correlation distance between two vectors
@@ -153,7 +160,7 @@ float64_t arm_cityblock_distance_f64(const float64_t *pA, const float64_t *pB, u
  * @return distance
  *
  */
-float32_t arm_correlation_distance_f32(float32_t *pA, float32_t *pB, uint32_t blockSize);
+float32_t arm_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Cosine distance between two vectors
@@ -165,7 +172,7 @@ float32_t arm_correlation_distance_f32(float32_t *pA, float32_t *pB, uint32_t bl
  *
  */
 
-float32_t arm_cosine_distance_f32(const float32_t *pA, const float32_t *pB, uint32_t blockSize);
+float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Cosine distance between two vectors
@@ -177,7 +184,7 @@ float32_t arm_cosine_distance_f32(const float32_t *pA, const float32_t *pB, uint
  *
  */
 
-float64_t arm_cosine_distance_f64(const float64_t *pA, const float64_t *pB, uint32_t blockSize);
+float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Jensen-Shannon distance between two vectors
@@ -199,8 +206,7 @@ float64_t arm_cosine_distance_f64(const float64_t *pA, const float64_t *pB, uint
  *
  */
 
-float32_t arm_jensenshannon_distance_f32(const float32_t *pA, const float32_t *pB,
-                                         uint32_t blockSize);
+float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB,uint32_t blockSize);
 
 /**
  * @brief        Minkowski distance between two vectors
@@ -213,8 +219,9 @@ float32_t arm_jensenshannon_distance_f32(const float32_t *pA, const float32_t *p
  *
  */
 
-float32_t arm_minkowski_distance_f32(const float32_t *pA, const float32_t *pB, int32_t order,
-                                     uint32_t blockSize);
+
+
+float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize);
 
 /**
  * @brief        Dice distance between two vectors
@@ -227,6 +234,7 @@ float32_t arm_minkowski_distance_f32(const float32_t *pA, const float32_t *pB, i
  *
  */
 
+
 float32_t arm_dice_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
 
 /**
@@ -275,8 +283,7 @@ float32_t arm_kulsinski_distance(const uint32_t *pA, const uint32_t *pB, uint32_
  *
  */
 
-float32_t arm_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB,
-                                      uint32_t numberOfBools);
+float32_t arm_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
 
 /**
  * @brief        Russell-Rao distance between two vectors
@@ -300,8 +307,7 @@ float32_t arm_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32
  *
  */
 
-float32_t arm_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB,
-                                     uint32_t numberOfBools);
+float32_t arm_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
 
 /**
  * @brief        Sokal-Sneath distance between two vectors
@@ -327,7 +333,53 @@ float32_t arm_sokalsneath_distance(const uint32_t *pA, const uint32_t *pB, uint3
 
 float32_t arm_yule_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools);
 
-#ifdef __cplusplus
+typedef enum
+  {
+    ARM_DTW_SAKOE_CHIBA_WINDOW = 1,
+    /*ARM_DTW_ITAKURA_WINDOW = 2,*/
+    ARM_DTW_SLANTED_BAND_WINDOW = 3
+  } arm_dtw_window;
+
+/**
+ * @brief        Window for dynamic time warping computation
+ * @param[in]    windowType  Type of window
+ * @param[in]    windowSize  Window size 
+ * @param[in,out] pWindow Window
+ * @return Error if window type not recognized
+ *
+ */
+arm_status arm_dtw_init_window_q7(const arm_dtw_window windowType,
+                                  const int32_t windowSize,
+                                  arm_matrix_instance_q7 *pWindow);
+
+/**
+ * @brief         Dynamic Time Warping distance
+ * @param[in]     pDistance  Distance matrix (Query rows * Template columns)
+ * @param[in]     pWindow  Windowing (can be NULL if no windowing used)
+ * @param[out]    pDTW Temporary cost buffer (same size)
+ * @param[out]    distance Distance
+ * @return Error in case no path can be found with window constraint
+ *
+ */
+
+arm_status arm_dtw_distance_f32(const arm_matrix_instance_f32 *pDistance,
+                               const arm_matrix_instance_q7 *pWindow,
+                               arm_matrix_instance_f32 *pDTW,
+                               float32_t *distance);
+
+
+/**
+ * @brief        Mapping between query and template
+ * @param[in]    pDTW  Cost matrix (Query rows * Template columns)
+ * @param[out]   pPath Warping path in cost matrix 2*(nb rows + nb columns)
+ * @param[out]   pathLength Length of path in number of points
+ * 
+ */
+
+void arm_dtw_path_f32(const arm_matrix_instance_f32 *pDTW,
+                      int16_t *pPath,
+                      uint32_t *pathLength);
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/distance_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/distance_functions_f16.h
old mode 100644
new mode 100755
similarity index 78%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/distance_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/distance_functions_f16.h
index 089633c7e09..224d8149974
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/distance_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/distance_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _DISTANCE_FUNCTIONS_F16_H_
-#define _DISTANCE_FUNCTIONS_F16_H_
+ 
+#ifndef DISTANCE_FUNCTIONS_F16_H_
+#define DISTANCE_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -33,18 +34,19 @@
 #include "dsp/utils.h"
 
 /* 6.14 bug */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
+#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
 /* Defined in minkowski_f32 */
 __attribute__((weak)) float __powisf2(float a, int b);
-#endif
+#endif 
 
 #include "dsp/statistics_functions_f16.h"
 #include "dsp/basic_math_functions_f16.h"
 
 #include "dsp/fast_math_functions_f16.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
@@ -55,10 +57,9 @@ extern "C" {
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
 
-float16_t arm_euclidean_distance_f16(const float16_t *pA, const float16_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Bray-Curtis distance between two vectors
@@ -66,9 +67,8 @@ float16_t arm_euclidean_distance_f16(const float16_t *pA, const float16_t *pB, u
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
-float16_t arm_braycurtis_distance_f16(const float16_t *pA, const float16_t *pB, uint32_t blockSize);
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Canberra distance between two vectors
@@ -81,9 +81,9 @@ float16_t arm_braycurtis_distance_f16(const float16_t *pA, const float16_t *pB,
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
-float16_t arm_canberra_distance_f16(const float16_t *pA, const float16_t *pB, uint32_t blockSize);
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Chebyshev distance between two vectors
@@ -91,9 +91,9 @@ float16_t arm_canberra_distance_f16(const float16_t *pA, const float16_t *pB, ui
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
-float16_t arm_chebyshev_distance_f16(const float16_t *pA, const float16_t *pB, uint32_t blockSize);
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Cityblock (Manhattan) distance between two vectors
@@ -101,9 +101,9 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA, const float16_t *pB, u
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
-float16_t arm_cityblock_distance_f16(const float16_t *pA, const float16_t *pB, uint32_t blockSize);
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Correlation distance between two vectors
@@ -114,9 +114,9 @@ float16_t arm_cityblock_distance_f16(const float16_t *pA, const float16_t *pB, u
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
-float16_t arm_correlation_distance_f16(float16_t *pA, float16_t *pB, uint32_t blockSize);
+float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize);
+
 
 /**
  * @brief        Cosine distance between two vectors
@@ -125,10 +125,9 @@ float16_t arm_correlation_distance_f16(float16_t *pA, float16_t *pB, uint32_t bl
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
+float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
 
-float16_t arm_cosine_distance_f16(const float16_t *pA, const float16_t *pB, uint32_t blockSize);
 
 /**
  * @brief        Jensen-Shannon distance between two vectors
@@ -147,11 +146,9 @@ float16_t arm_cosine_distance_f16(const float16_t *pA, const float16_t *pB, uint
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB,uint32_t blockSize);
 
-float16_t arm_jensenshannon_distance_f16(const float16_t *pA, const float16_t *pB,
-                                         uint32_t blockSize);
 
 /**
  * @brief        Minkowski distance between two vectors
@@ -161,14 +158,12 @@ float16_t arm_jensenshannon_distance_f16(const float16_t *pA, const float16_t *p
  * @param[in]    n          Norm order (>= 2)
  * @param[in]    blockSize  vector length
  * @return distance
- *
  */
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize);
 
-float16_t arm_minkowski_distance_f16(const float16_t *pA, const float16_t *pB, int32_t order,
-                                     uint32_t blockSize);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/fast_math_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/fast_math_functions.h
old mode 100644
new mode 100755
similarity index 74%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/fast_math_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/fast_math_functions.h
index a0bf53c3120..8e600ccff30
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/fast_math_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/fast_math_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _FAST_MATH_FUNCTIONS_H_
-#define _FAST_MATH_FUNCTIONS_H_
+ 
+#ifndef FAST_MATH_FUNCTIONS_H_
+#define FAST_MATH_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -34,22 +35,31 @@
 
 #include "dsp/basic_math_functions.h"
 
-#ifdef __cplusplus
-extern "C" {
+#include <math.h>
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
-/**
+  /**
    * @brief Macros required for SINE and COSINE Fast math approximations
    */
 
-#define FAST_MATH_TABLE_SIZE 512
-#define FAST_MATH_Q31_SHIFT (32 - 10)
-#define FAST_MATH_Q15_SHIFT (16 - 10)
-
+#define FAST_MATH_TABLE_SIZE  512
+#define FAST_MATH_Q31_SHIFT   (32 - 10)
+#define FAST_MATH_Q15_SHIFT   (16 - 10)
+  
 #ifndef PI
-#define PI 3.14159265358979f
+  #define PI               3.14159265358979f
 #endif
 
+#ifndef PI_F64 
+  #define PI_F64 3.14159265358979323846
+#endif
+
+
+
 /**
  * @defgroup groupFastMath Fast Math Functions
  * This set of functions provides a fast approximation to sine, cosine, and square root.
@@ -59,125 +69,135 @@ extern "C" {
  *
  */
 
-/**
-   * @ingroup groupFastMath
-   */
-
-/**
-  @addtogroup sin
-  @{
- */
 
-/**
+   /**
    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
    * @param[in] x  input value in radians.
    * @return  sin(x).
    */
-float32_t arm_sin_f32(float32_t x);
+  float32_t arm_sin_f32(
+  float32_t x);
 
-/**
+
+  /**
    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
    * @param[in] x  Scaled input value in radians.
    * @return  sin(x).
    */
-q31_t arm_sin_q31(q31_t x);
+  q31_t arm_sin_q31(
+  q31_t x);
 
-/**
+  /**
    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
    * @param[in] x  Scaled input value in radians.
    * @return  sin(x).
    */
-q15_t arm_sin_q15(q15_t x);
+  q15_t arm_sin_q15(
+  q15_t x);
 
-/**
-  @} end of sin group
- */
-
-/**
-  @addtogroup cos
-  @{
- */
 
-/**
+  /**
    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
    * @param[in] x  input value in radians.
    * @return  cos(x).
    */
-float32_t arm_cos_f32(float32_t x);
+  float32_t arm_cos_f32(
+  float32_t x);
 
-/**
+
+  /**
    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
    * @param[in] x  Scaled input value in radians.
    * @return  cos(x).
    */
-q31_t arm_cos_q31(q31_t x);
+  q31_t arm_cos_q31(
+  q31_t x);
 
-/**
+
+  /**
    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
    * @param[in] x  Scaled input value in radians.
    * @return  cos(x).
    */
-q15_t arm_cos_q15(q15_t x);
+  q15_t arm_cos_q15(
+  q15_t x);
 
-/**
-  @} end of cos group
- */
 
 /**
   @brief         Floating-point vector of log values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vlog_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_vlog_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
   @brief         Floating-point vector of log values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vlog_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
+  void arm_vlog_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief  q31 vector of log values.
    * @param[in]     pSrc       points to the input vector in q31
    * @param[out]    pDst       points to the output vector in q5.26
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_vlog_q31(const q31_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_vlog_q31(const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  q15 vector of log values.
    * @param[in]     pSrc       points to the input vector in q15
    * @param[out]    pDst       points to the output vector in q4.11
    * @param[in]     blockSize  number of samples in each vector
-   * @return        none
    */
-void arm_vlog_q15(const q15_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_vlog_q15(const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
   @brief         Floating-point vector of exp values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vexp_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_vexp_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
 
 /**
   @brief         Floating-point vector of exp values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vexp_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
+  void arm_vexp_f64(
+  const float64_t * pSrc,
+		float64_t * pDst,
+		uint32_t blockSize);
 
-/**
+
+
+ /**
    * @defgroup SQRT Square Root
    *
    * Computes the square root of a number.
@@ -197,7 +217,8 @@ void arm_vexp_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
    * </pre>
    */
 
-/**
+
+  /**
    * @addtogroup SQRT
    * @{
    */
@@ -210,33 +231,49 @@ void arm_vexp_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
                    - \ref ARM_MATH_SUCCESS        : input value is positive
                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
  */
-__STATIC_FORCEINLINE arm_status arm_sqrt_f32(const float32_t in, float32_t *pOut)
-{
-    if (in >= 0.0f) {
-#if defined(__CC_ARM)
-#if defined __TARGET_FPU_VFP
-        *pOut = __sqrtf(in);
-#else
-        *pOut = sqrtf(in);
-#endif
-
-#elif defined(__ICCARM__)
-#if defined __ARMVFP__
-        __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
+__STATIC_FORCEINLINE arm_status arm_sqrt_f32(
+  const float32_t in,
+  float32_t * pOut)
+  {
+    if (in >= 0.0f)
+    {
+#if defined ( __CC_ARM )
+  #if defined __TARGET_FPU_VFP
+      *pOut = __sqrtf(in);
+  #else
+      *pOut = sqrtf(in);
+  #endif
+
+#elif defined ( __ICCARM__ )
+  #if defined __ARMVFP__
+      __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
+  #else
+      *pOut = sqrtf(in);
+  #endif
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+      *pOut = _sqrtf(in);
+#elif defined(__GNUC_PYTHON__)
+      *pOut = sqrtf(in);
+#elif defined ( __GNUC__ )
+  #if defined (__VFP_FP__) && !defined(__SOFTFP__)
+      __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
+  #else
+      *pOut = sqrtf(in);
+  #endif
 #else
-        *pOut = sqrtf(in);
+      *pOut = sqrtf(in);
 #endif
 
-#else
-        *pOut = sqrtf(in);
-#endif
-
-        return (ARM_MATH_SUCCESS);
-    } else {
-        *pOut = 0.0f;
-        return (ARM_MATH_ARGUMENT_ERROR);
+      return (ARM_MATH_SUCCESS);
     }
-}
+    else
+    {
+      *pOut = 0.0f;
+      return (ARM_MATH_ARGUMENT_ERROR);
+    }
+  }
+
 
 /**
   @brief         Q31 square root function.
@@ -246,7 +283,10 @@ __STATIC_FORCEINLINE arm_status arm_sqrt_f32(const float32_t in, float32_t *pOut
                    - \ref ARM_MATH_SUCCESS        : input value is positive
                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
  */
-arm_status arm_sqrt_q31(q31_t in, q31_t *pOut);
+arm_status arm_sqrt_q31(
+  q31_t in,
+  q31_t * pOut);
+
 
 /**
   @brief         Q15 square root function.
@@ -256,13 +296,17 @@ arm_status arm_sqrt_q31(q31_t in, q31_t *pOut);
                    - \ref ARM_MATH_SUCCESS        : input value is positive
                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
  */
-arm_status arm_sqrt_q15(q15_t in, q15_t *pOut);
+arm_status arm_sqrt_q15(
+  q15_t in,
+  q15_t * pOut);
 
-/**
+
+
+  /**
    * @} end of SQRT group
    */
 
-/**
+  /**
   @brief         Fixed point division
   @param[in]     numerator    Numerator
   @param[in]     denominator  Denominator
@@ -274,9 +318,12 @@ arm_status arm_sqrt_q15(q15_t in, q15_t *pOut);
   to the saturated negative or positive value.
  */
 
-arm_status arm_divide_q15(q15_t numerator, q15_t denominator, q15_t *quotient, int16_t *shift);
+arm_status arm_divide_q15(q15_t numerator,
+  q15_t denominator,
+  q15_t *quotient,
+  int16_t *shift);
 
-/**
+  /**
   @brief         Fixed point division
   @param[in]     numerator    Numerator
   @param[in]     denominator  Denominator
@@ -288,36 +335,42 @@ arm_status arm_divide_q15(q15_t numerator, q15_t denominator, q15_t *quotient, i
   to the saturated negative or positive value.
  */
 
-arm_status arm_divide_q31(q31_t numerator, q31_t denominator, q31_t *quotient, int16_t *shift);
+arm_status arm_divide_q31(q31_t numerator,
+  q31_t denominator,
+  q31_t *quotient,
+  int16_t *shift);
 
-/**
+
+
+  /**
      @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
      @param[in]   y  y coordinate
      @param[in]   x  x coordinate
      @param[out]  result  Result
      @return  error status.
    */
-arm_status arm_atan2_f32(float32_t y, float32_t x, float32_t *result);
+  arm_status arm_atan2_f32(float32_t y,float32_t x,float32_t *result);
 
-/**
+
+  /**
      @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
      @param[in]   y  y coordinate
      @param[in]   x  x coordinate
      @param[out]  result  Result in Q2.29
      @return  error status.
    */
-arm_status arm_atan2_q31(q31_t y, q31_t x, q31_t *result);
+  arm_status arm_atan2_q31(q31_t y,q31_t x,q31_t *result);
 
-/**
+  /**
      @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
      @param[in]   y  y coordinate
      @param[in]   x  x coordinate
      @param[out]  result  Result in Q2.13
      @return  error status.
    */
-arm_status arm_atan2_q15(q15_t y, q15_t x, q15_t *result);
+  arm_status arm_atan2_q15(q15_t y,q15_t x,q15_t *result);
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/fast_math_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/fast_math_functions_f16.h
old mode 100644
new mode 100755
similarity index 79%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/fast_math_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/fast_math_functions_f16.h
index 7aca2c59b68..1fa45a86e1b
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/fast_math_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/fast_math_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _FAST_MATH_FUNCTIONS_F16_H_
-#define _FAST_MATH_FUNCTIONS_F16_H_
+ 
+#ifndef FAST_MATH_FUNCTIONS_F16_H_
+#define FAST_MATH_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -35,13 +36,14 @@
 /* For sqrt_f32 */
 #include "dsp/fast_math_functions.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+ /**
    * @addtogroup SQRT
    * @{
    */
@@ -54,57 +56,66 @@ extern "C" {
                    - \ref ARM_MATH_SUCCESS        : input value is positive
                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
  */
-__STATIC_FORCEINLINE arm_status arm_sqrt_f16(float16_t in, float16_t *pOut)
-{
+__STATIC_FORCEINLINE arm_status arm_sqrt_f16(
+  float16_t in,
+  float16_t * pOut)
+  {
     float32_t r;
     arm_status status;
-    status = arm_sqrt_f32((float32_t)in, &r);
-    *pOut = (float16_t)r;
-    return (status);
-}
+    status=arm_sqrt_f32((float32_t)in,&r);
+    *pOut=(float16_t)r;
+    return(status);
+  }
+
 
 /**
   @} end of SQRT group
  */
-
+  
 /**
   @brief         Floating-point vector of log values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vlog_f16(const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_vlog_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
 /**
   @brief         Floating-point vector of exp values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vexp_f16(const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_vexp_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
   @brief         Floating-point vector of inverse values.
   @param[in]     pSrc       points to the input vector
   @param[out]    pDst       points to the output vector
   @param[in]     blockSize  number of samples in each vector
-  @return        none
  */
-void arm_vinverse_f16(const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_vinverse_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
      @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
      @param[in]   y  y coordinate
      @param[in]   x  x coordinate
      @param[out]  result  Result
      @return  error status.
    */
-arm_status arm_atan2_f16(float16_t y, float16_t x, float16_t *result);
+  arm_status arm_atan2_f16(float16_t y,float16_t x,float16_t *result);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/filtering_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/filtering_functions.h
old mode 100644
new mode 100755
similarity index 57%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/filtering_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/filtering_functions.h
index e290890d600..fa149595bbe
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/filtering_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/filtering_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _FILTERING_FUNCTIONS_H_
-#define _FILTERING_FUNCTIONS_H_
+ 
+#ifndef FILTERING_FUNCTIONS_H_
+#define FILTERING_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -35,76 +36,84 @@
 #include "dsp/support_functions.h"
 #include "dsp/fast_math_functions.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
-#define DELTA_Q31 ((q31_t)(0x100))
-#define DELTA_Q15 ((q15_t)0x5)
+
+
+#define DELTA_Q31          ((q31_t)(0x100))
+#define DELTA_Q15          ((q15_t)0x5)
 
 /**
  * @defgroup groupFilters Filtering Functions
  */
-
-/**
+    
+  /**
    * @brief Instance structure for the Q7 FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of filter coefficients in the filter. */
-    q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-} arm_fir_instance_q7;
+  typedef struct
+  {
+          uint16_t numTaps;        /**< number of filter coefficients in the filter. */
+          q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+  } arm_fir_instance_q7;
 
-/**
+  /**
    * @brief Instance structure for the Q15 FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of filter coefficients in the filter. */
-    q15_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-} arm_fir_instance_q15;
+  typedef struct
+  {
+          uint16_t numTaps;         /**< number of filter coefficients in the filter. */
+          q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+  } arm_fir_instance_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of filter coefficients in the filter. */
-    q31_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-} arm_fir_instance_q31;
+  typedef struct
+  {
+          uint16_t numTaps;         /**< number of filter coefficients in the filter. */
+          q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_q31;
 
-/**
+  /**
    * @brief Instance structure for the floating-point FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of filter coefficients in the filter. */
-    float32_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-} arm_fir_instance_f32;
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f32;
 
-/**
+  /**
    * @brief Instance structure for the floating-point FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of filter coefficients in the filter. */
-    float64_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    const float64_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-} arm_fir_instance_f64;
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float64_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float64_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f64;
 
-/**
+  /**
    * @brief Processing function for the Q7 FIR filter.
    * @param[in]  S          points to an instance of the Q7 FIR filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_q7(const arm_fir_instance_q7 *S, const q7_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_fir_q7(
+  const arm_fir_instance_q7 * S,
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the Q7 FIR filter.
    * @param[in,out] S          points to an instance of the Q7 FIR structure.
    * @param[in]     numTaps    Number of filter coefficients in the filter.
@@ -115,29 +124,40 @@ void arm_fir_q7(const arm_fir_instance_q7 *S, const q7_t *pSrc, q7_t *pDst, uint
    * For the MVE version, the coefficient length must be a multiple of 16.
    * You can pad with zeros if you have less coefficients.
    */
-void arm_fir_init_q7(arm_fir_instance_q7 *S, uint16_t numTaps, const q7_t *pCoeffs, q7_t *pState,
-                     uint32_t blockSize);
+  void arm_fir_init_q7(
+        arm_fir_instance_q7 * S,
+        uint16_t numTaps,
+  const q7_t * pCoeffs,
+        q7_t * pState,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the Q15 FIR filter.
    * @param[in]  S          points to an instance of the Q15 FIR structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_fir_q15(
+  const arm_fir_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the fast Q15 FIR filter (fast version).
    * @param[in]  S          points to an instance of the Q15 FIR filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_fast_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *pDst,
-                      uint32_t blockSize);
+  void arm_fir_fast_q15(
+  const arm_fir_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the Q15 FIR filter.
    * @param[in,out] S          points to an instance of the Q15 FIR filter structure.
    * @param[in]     numTaps    Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
@@ -152,29 +172,40 @@ void arm_fir_fast_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *p
    * You can pad with zeros if you have less coefficients.
    *
    */
-arm_status arm_fir_init_q15(arm_fir_instance_q15 *S, uint16_t numTaps, const q15_t *pCoeffs,
-                            q15_t *pState, uint32_t blockSize);
+  arm_status arm_fir_init_q15(
+        arm_fir_instance_q15 * S,
+        uint16_t numTaps,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the Q31 FIR filter.
    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_fir_q31(
+  const arm_fir_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the fast Q31 FIR filter (fast version).
    * @param[in]  S          points to an instance of the Q31 FIR filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_fast_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *pDst,
-                      uint32_t blockSize);
+  void arm_fir_fast_q31(
+  const arm_fir_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the Q31 FIR filter.
    * @param[in,out] S          points to an instance of the Q31 FIR structure.
    * @param[in]     numTaps    Number of filter coefficients in the filter.
@@ -185,30 +216,40 @@ void arm_fir_fast_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *p
    * For the MVE version, the coefficient length must be a multiple of 4.
    * You can pad with zeros if you have less coefficients.
    */
-void arm_fir_init_q31(arm_fir_instance_q31 *S, uint16_t numTaps, const q31_t *pCoeffs,
-                      q31_t *pState, uint32_t blockSize);
+  void arm_fir_init_q31(
+        arm_fir_instance_q31 * S,
+        uint16_t numTaps,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the floating-point FIR filter.
    * @param[in]  S          points to an instance of the floating-point FIR structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_f32(const arm_fir_instance_f32 *S, const float32_t *pSrc, float32_t *pDst,
-                 uint32_t blockSize);
+  void arm_fir_f32(
+  const arm_fir_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the floating-point FIR filter.
    * @param[in]  S          points to an instance of the floating-point FIR structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_f64(const arm_fir_instance_f64 *S, const float64_t *pSrc, float64_t *pDst,
-                 uint32_t blockSize);
+  void arm_fir_f64(
+  const arm_fir_instance_f64 * S,
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the floating-point FIR filter.
    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
    * @param[in]     numTaps    Number of filter coefficients in the filter.
@@ -216,10 +257,14 @@ void arm_fir_f64(const arm_fir_instance_f64 *S, const float64_t *pSrc, float64_t
    * @param[in]     pState     points to the state buffer.
    * @param[in]     blockSize  number of samples that are processed at a time.
    */
-void arm_fir_init_f32(arm_fir_instance_f32 *S, uint16_t numTaps, const float32_t *pCoeffs,
-                      float32_t *pState, uint32_t blockSize);
+  void arm_fir_init_f32(
+        arm_fir_instance_f32 * S,
+        uint16_t numTaps,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the floating-point FIR filter.
    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
    * @param[in]     numTaps    Number of filter coefficients in the filter.
@@ -227,68 +272,69 @@ void arm_fir_init_f32(arm_fir_instance_f32 *S, uint16_t numTaps, const float32_t
    * @param[in]     pState     points to the state buffer.
    * @param[in]     blockSize  number of samples that are processed at a time.
    */
-void arm_fir_init_f64(arm_fir_instance_f64 *S, uint16_t numTaps, const float64_t *pCoeffs,
-                      float64_t *pState, uint32_t blockSize);
+  void arm_fir_init_f64(
+        arm_fir_instance_f64 * S,
+        uint16_t numTaps,
+  const float64_t * pCoeffs,
+        float64_t * pState,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Instance structure for the Q15 Biquad cascade filter.
    */
-typedef struct {
-    int8_t numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q15_t *
-        pState; /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    const q15_t
-        *pCoeffs; /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-    int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */
-} arm_biquad_casd_df1_inst_q15;
+  typedef struct
+  {
+          int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+          int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
+  } arm_biquad_casd_df1_inst_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 Biquad cascade filter.
    */
-typedef struct {
-    uint32_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q31_t *
-        pState; /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    const q31_t
-        *pCoeffs; /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-    uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */
-} arm_biquad_casd_df1_inst_q31;
+  typedef struct
+  {
+          uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
+  } arm_biquad_casd_df1_inst_q31;
 
-/**
+  /**
    * @brief Instance structure for the floating-point Biquad cascade filter.
    */
-typedef struct {
-    uint32_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *
-        pState; /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    const float32_t
-        *pCoeffs; /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_casd_df1_inst_f32;
+  typedef struct
+  {
+          uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_casd_df1_inst_f32;
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-/**
+  /**
    * @brief Instance structure for the modified Biquad coefs required by vectorized code.
    */
-typedef struct {
-    float32_t coeffs
-        [8]
-        [4]; /**< Points to the array of modified coefficients.  The array is of length 32. There is one per stage */
-} arm_biquad_mod_coef_f32;
-#endif
+  typedef struct
+  {
+      float32_t coeffs[8][4]; /**< Points to the array of modified coefficients.  The array is of length 32. There is one per stage */
+  } arm_biquad_mod_coef_f32;
+#endif 
 
-/**
+  /**
    * @brief Processing function for the Q15 Biquad cascade filter.
    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df1_q15(const arm_biquad_casd_df1_inst_q15 *S, const q15_t *pSrc,
-                                q15_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df1_q15(
+  const arm_biquad_casd_df1_inst_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the Q15 Biquad cascade filter.
    * @param[in,out] S          points to an instance of the Q15 Biquad cascade structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
@@ -296,40 +342,53 @@ void arm_biquad_cascade_df1_q15(const arm_biquad_casd_df1_inst_q15 *S, const q15
    * @param[in]     pState     points to the state buffer.
    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
    */
-void arm_biquad_cascade_df1_init_q15(arm_biquad_casd_df1_inst_q15 *S, uint8_t numStages,
-                                     const q15_t *pCoeffs, q15_t *pState, int8_t postShift);
+  void arm_biquad_cascade_df1_init_q15(
+        arm_biquad_casd_df1_inst_q15 * S,
+        uint8_t numStages,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        int8_t postShift);
 
-/**
+  /**
    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df1_fast_q15(const arm_biquad_casd_df1_inst_q15 *S, const q15_t *pSrc,
-                                     q15_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df1_fast_q15(
+  const arm_biquad_casd_df1_inst_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the Q31 Biquad cascade filter
    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df1_q31(const arm_biquad_casd_df1_inst_q31 *S, const q31_t *pSrc,
-                                q31_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df1_q31(
+  const arm_biquad_casd_df1_inst_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
    * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df1_fast_q31(const arm_biquad_casd_df1_inst_q31 *S, const q31_t *pSrc,
-                                     q31_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df1_fast_q31(
+  const arm_biquad_casd_df1_inst_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the Q31 Biquad cascade filter.
    * @param[in,out] S          points to an instance of the Q31 Biquad cascade structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
@@ -337,20 +396,27 @@ void arm_biquad_cascade_df1_fast_q31(const arm_biquad_casd_df1_inst_q31 *S, cons
    * @param[in]     pState     points to the state buffer.
    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
    */
-void arm_biquad_cascade_df1_init_q31(arm_biquad_casd_df1_inst_q31 *S, uint8_t numStages,
-                                     const q31_t *pCoeffs, q31_t *pState, int8_t postShift);
+  void arm_biquad_cascade_df1_init_q31(
+        arm_biquad_casd_df1_inst_q31 * S,
+        uint8_t numStages,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        int8_t postShift);
 
-/**
+  /**
    * @brief Processing function for the floating-point Biquad cascade filter.
    * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df1_f32(const arm_biquad_casd_df1_inst_f32 *S, const float32_t *pSrc,
-                                float32_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df1_f32(
+  const arm_biquad_casd_df1_inst_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the floating-point Biquad cascade filter.
    * @param[in,out] S          points to an instance of the floating-point Biquad cascade structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
@@ -359,13 +425,20 @@ void arm_biquad_cascade_df1_f32(const arm_biquad_casd_df1_inst_f32 *S, const flo
    * @param[in]     pState     points to the state buffer.
    */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_biquad_cascade_df1_mve_init_f32(arm_biquad_casd_df1_inst_f32 *S, uint8_t numStages,
-                                         const float32_t *pCoeffs,
-                                         arm_biquad_mod_coef_f32 *pCoeffsMod, float32_t *pState);
+  void arm_biquad_cascade_df1_mve_init_f32(
+      arm_biquad_casd_df1_inst_f32 * S,
+      uint8_t numStages,
+      const float32_t * pCoeffs, 
+      arm_biquad_mod_coef_f32 * pCoeffsMod, 
+      float32_t * pState);
 #endif
+  
+  void arm_biquad_cascade_df1_init_f32(
+        arm_biquad_casd_df1_inst_f32 * S,
+        uint8_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
-void arm_biquad_cascade_df1_init_f32(arm_biquad_casd_df1_inst_f32 *S, uint8_t numStages,
-                                     const float32_t *pCoeffs, float32_t *pState);
 
 /**
  * @brief Convolution of floating-point sequences.
@@ -375,10 +448,15 @@ void arm_biquad_cascade_df1_init_f32(arm_biquad_casd_df1_inst_f32 *S, uint8_t nu
  * @param[in]  srcBLen  length of the second input sequence.
  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
  */
-void arm_conv_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB,
-                  uint32_t srcBLen, float32_t *pDst);
+  void arm_conv_f32(
+  const float32_t * pSrcA,
+        uint32_t srcALen,
+  const float32_t * pSrcB,
+        uint32_t srcBLen,
+        float32_t * pDst);
 
-/**
+
+  /**
    * @brief Convolution of Q15 sequences.
    * @param[in]  pSrcA      points to the first input sequence.
    * @param[in]  srcALen    length of the first input sequence.
@@ -388,8 +466,15 @@ void arm_conv_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSr
    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
    */
-void arm_conv_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen,
-                      q15_t *pDst, q15_t *pScratch1, q15_t *pScratch2);
+  void arm_conv_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
+
 
 /**
  * @brief Convolution of Q15 sequences.
@@ -399,10 +484,15 @@ void arm_conv_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
  * @param[in]  srcBLen  length of the second input sequence.
  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
  */
-void arm_conv_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen,
-                  q15_t *pDst);
+  void arm_conv_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
 
-/**
+
+  /**
    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -410,10 +500,15 @@ void arm_conv_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
-void arm_conv_fast_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen,
-                       q15_t *pDst);
+  void arm_conv_fast_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
 
-/**
+
+  /**
    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA      points to the first input sequence.
    * @param[in]  srcALen    length of the first input sequence.
@@ -423,10 +518,17 @@ void arm_conv_fast_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
    * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
    */
-void arm_conv_fast_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                           uint32_t srcBLen, q15_t *pDst, q15_t *pScratch1, q15_t *pScratch2);
+  void arm_conv_fast_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
-/**
+
+  /**
    * @brief Convolution of Q31 sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -434,10 +536,15 @@ void arm_conv_fast_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pS
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
-void arm_conv_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen,
-                  q31_t *pDst);
+  void arm_conv_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
-/**
+
+  /**
    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -445,10 +552,15 @@ void arm_conv_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
-void arm_conv_fast_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen,
-                       q31_t *pDst);
+  void arm_conv_fast_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
-/**
+
+    /**
    * @brief Convolution of Q7 sequences.
    * @param[in]  pSrcA      points to the first input sequence.
    * @param[in]  srcALen    length of the first input sequence.
@@ -458,10 +570,17 @@ void arm_conv_fast_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB,
    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
    */
-void arm_conv_opt_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB, uint32_t srcBLen,
-                     q7_t *pDst, q15_t *pScratch1, q15_t *pScratch2);
+  void arm_conv_opt_q7(
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
-/**
+
+  /**
    * @brief Convolution of Q7 sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -469,10 +588,15 @@ void arm_conv_opt_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB, uin
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
-void arm_conv_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB, uint32_t srcBLen,
-                 q7_t *pDst);
+  void arm_conv_q7(
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst);
 
-/**
+
+  /**
    * @brief Partial convolution of floating-point sequences.
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -483,11 +607,17 @@ void arm_conv_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB, uint32_
    * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB,
-                                uint32_t srcBLen, float32_t *pDst, uint32_t firstIndex,
-                                uint32_t numPoints);
+  arm_status arm_conv_partial_f32(
+  const float32_t * pSrcA,
+        uint32_t srcALen,
+  const float32_t * pSrcB,
+        uint32_t srcBLen,
+        float32_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
-/**
+
+  /**
    * @brief Partial convolution of Q15 sequences.
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -500,11 +630,19 @@ arm_status arm_conv_partial_f32(const float32_t *pSrcA, uint32_t srcALen, const
    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                                    uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex,
-                                    uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2);
+  arm_status arm_conv_partial_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
-/**
+
+  /**
    * @brief Partial convolution of Q15 sequences.
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -515,11 +653,17 @@ arm_status arm_conv_partial_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const
    * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                                uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex,
-                                uint32_t numPoints);
+  arm_status arm_conv_partial_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
-/**
+
+  /**
    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -530,11 +674,17 @@ arm_status arm_conv_partial_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_
    * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_fast_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                                     uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex,
-                                     uint32_t numPoints);
+  arm_status arm_conv_partial_fast_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
-/**
+
+  /**
    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -547,11 +697,19 @@ arm_status arm_conv_partial_fast_q15(const q15_t *pSrcA, uint32_t srcALen, const
    * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_fast_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                                         uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex,
-                                         uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2);
+  arm_status arm_conv_partial_fast_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
-/**
+
+  /**
    * @brief Partial convolution of Q31 sequences.
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -562,11 +720,17 @@ arm_status arm_conv_partial_fast_opt_q15(const q15_t *pSrcA, uint32_t srcALen, c
    * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB,
-                                uint32_t srcBLen, q31_t *pDst, uint32_t firstIndex,
-                                uint32_t numPoints);
+  arm_status arm_conv_partial_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
-/**
+
+  /**
    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -577,11 +741,17 @@ arm_status arm_conv_partial_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_
    * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_fast_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB,
-                                     uint32_t srcBLen, q31_t *pDst, uint32_t firstIndex,
-                                     uint32_t numPoints);
+  arm_status arm_conv_partial_fast_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
-/**
+
+  /**
    * @brief Partial convolution of Q7 sequences
    * @param[in]  pSrcA       points to the first input sequence.
    * @param[in]  srcALen     length of the first input sequence.
@@ -594,9 +764,17 @@ arm_status arm_conv_partial_fast_q31(const q31_t *pSrcA, uint32_t srcALen, const
    * @param[in]  pScratch2   points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_opt_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB,
-                                   uint32_t srcBLen, q7_t *pDst, uint32_t firstIndex,
-                                   uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2);
+  arm_status arm_conv_partial_opt_q7(
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
+
 
 /**
    * @brief Partial convolution of Q7 sequences.
@@ -609,52 +787,108 @@ arm_status arm_conv_partial_opt_q7(const q7_t *pSrcA, uint32_t srcALen, const q7
    * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-arm_status arm_conv_partial_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB,
-                               uint32_t srcBLen, q7_t *pDst, uint32_t firstIndex,
-                               uint32_t numPoints);
+  arm_status arm_conv_partial_q7(
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
-/**
+
+  /**
    * @brief Instance structure for the Q15 FIR decimator.
    */
-typedef struct {
-    uint8_t M; /**< decimation factor. */
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    q15_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-} arm_fir_decimate_instance_q15;
+  typedef struct
+  {
+          uint8_t M;                  /**< decimation factor. */
+          uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
+          q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+  } arm_fir_decimate_instance_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 FIR decimator.
    */
-typedef struct {
-    uint8_t M; /**< decimation factor. */
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    q31_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-} arm_fir_decimate_instance_q31;
+  typedef struct
+  {
+          uint8_t M;                  /**< decimation factor. */
+          uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
+          q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+  } arm_fir_decimate_instance_q31;
 
 /**
-  @brief Instance structure for floating-point FIR decimator.
+  @brief Instance structure for single precision floating-point FIR decimator.
  */
-typedef struct {
-    uint8_t M; /**< decimation factor. */
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    float32_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-} arm_fir_decimate_instance_f32;
-
-/**
+typedef struct
+  {
+          uint8_t M;                  /**< decimation factor. */
+          uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
+          float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+  } arm_fir_decimate_instance_f32;
+
+  /**
+  @brief Instance structure for double precision floating-point FIR decimator.
+ */
+  typedef struct
+  {
+    uint8_t M;                  /**< decimation factor. */
+    uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const float64_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
+    float64_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+  } arm_fir_decimate_instance_f64;
+
+  /**
   @brief         Processing function for floating-point FIR decimator.
   @param[in]     S         points to an instance of the floating-point FIR decimator structure
   @param[in]     pSrc      points to the block of input data
   @param[out]    pDst      points to the block of output data
   @param[in]     blockSize number of samples to process
  */
-void arm_fir_decimate_f32(const arm_fir_decimate_instance_f32 *S, const float32_t *pSrc,
-                          float32_t *pDst, uint32_t blockSize);
+  void arm_fir_decimate_f64(
+      const arm_fir_decimate_instance_f64 * S,
+      const float64_t * pSrc,
+      float64_t * pDst,
+      uint32_t blockSize);
+
+
+  /**
+    @brief         Initialization function for the floating-point FIR decimator.
+    @param[in,out] S          points to an instance of the floating-point FIR decimator structure
+    @param[in]     numTaps    number of coefficients in the filter
+    @param[in]     M          decimation factor
+    @param[in]     pCoeffs    points to the filter coefficients
+    @param[in]     pState     points to the state buffer
+    @param[in]     blockSize  number of input samples to process per call
+    @return        execution status
+                     - \ref ARM_MATH_SUCCESS      : Operation successful
+                     - \ref ARM_MATH_LENGTH_ERROR : <code>blockSize</code> is not a multiple of <code>M</code>
+   */
+  arm_status arm_fir_decimate_init_f64(
+      arm_fir_decimate_instance_f64 * S,
+      uint16_t numTaps,
+      uint8_t M,
+      const float64_t * pCoeffs,
+      float64_t * pState,
+      uint32_t blockSize);
+
+
+  /**
+  @brief         Processing function for floating-point FIR decimator.
+  @param[in]     S         points to an instance of the floating-point FIR decimator structure
+  @param[in]     pSrc      points to the block of input data
+  @param[out]    pDst      points to the block of output data
+  @param[in]     blockSize number of samples to process
+ */
+void arm_fir_decimate_f32(
+  const arm_fir_decimate_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
 
 /**
   @brief         Initialization function for the floating-point FIR decimator.
@@ -668,31 +902,44 @@ void arm_fir_decimate_f32(const arm_fir_decimate_instance_f32 *S, const float32_
                    - \ref ARM_MATH_SUCCESS      : Operation successful
                    - \ref ARM_MATH_LENGTH_ERROR : <code>blockSize</code> is not a multiple of <code>M</code>
  */
-arm_status arm_fir_decimate_init_f32(arm_fir_decimate_instance_f32 *S, uint16_t numTaps, uint8_t M,
-                                     const float32_t *pCoeffs, float32_t *pState,
-                                     uint32_t blockSize);
+arm_status arm_fir_decimate_init_f32(
+        arm_fir_decimate_instance_f32 * S,
+        uint16_t numTaps,
+        uint8_t M,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q15 FIR decimator.
    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of input samples to process per call.
    */
-void arm_fir_decimate_q15(const arm_fir_decimate_instance_q15 *S, const q15_t *pSrc, q15_t *pDst,
-                          uint32_t blockSize);
+  void arm_fir_decimate_q15(
+  const arm_fir_decimate_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
    * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of input samples to process per call.
    */
-void arm_fir_decimate_fast_q15(const arm_fir_decimate_instance_q15 *S, const q15_t *pSrc,
-                               q15_t *pDst, uint32_t blockSize);
+  void arm_fir_decimate_fast_q15(
+  const arm_fir_decimate_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q15 FIR decimator.
    * @param[in,out] S          points to an instance of the Q15 FIR decimator structure.
    * @param[in]     numTaps    number of coefficients in the filter.
@@ -703,30 +950,43 @@ void arm_fir_decimate_fast_q15(const arm_fir_decimate_instance_q15 *S, const q15
    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
-arm_status arm_fir_decimate_init_q15(arm_fir_decimate_instance_q15 *S, uint16_t numTaps, uint8_t M,
-                                     const q15_t *pCoeffs, q15_t *pState, uint32_t blockSize);
+  arm_status arm_fir_decimate_init_q15(
+        arm_fir_decimate_instance_q15 * S,
+        uint16_t numTaps,
+        uint8_t M,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q31 FIR decimator.
    * @param[in]  S     points to an instance of the Q31 FIR decimator structure.
    * @param[in]  pSrc  points to the block of input data.
    * @param[out] pDst  points to the block of output data
    * @param[in] blockSize number of input samples to process per call.
    */
-void arm_fir_decimate_q31(const arm_fir_decimate_instance_q31 *S, const q31_t *pSrc, q31_t *pDst,
-                          uint32_t blockSize);
+  void arm_fir_decimate_q31(
+  const arm_fir_decimate_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
    * @param[in]  S          points to an instance of the Q31 FIR decimator structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of input samples to process per call.
    */
-void arm_fir_decimate_fast_q31(const arm_fir_decimate_instance_q31 *S, const q31_t *pSrc,
-                               q31_t *pDst, uint32_t blockSize);
+  void arm_fir_decimate_fast_q31(
+  const arm_fir_decimate_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q31 FIR decimator.
    * @param[in,out] S          points to an instance of the Q31 FIR decimator structure.
    * @param[in]     numTaps    number of coefficients in the filter.
@@ -737,56 +997,64 @@ void arm_fir_decimate_fast_q31(const arm_fir_decimate_instance_q31 *S, const q31
    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
-arm_status arm_fir_decimate_init_q31(arm_fir_decimate_instance_q31 *S, uint16_t numTaps, uint8_t M,
-                                     const q31_t *pCoeffs, q31_t *pState, uint32_t blockSize);
+  arm_status arm_fir_decimate_init_q31(
+        arm_fir_decimate_instance_q31 * S,
+        uint16_t numTaps,
+        uint8_t M,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the Q15 FIR interpolator.
    */
-typedef struct {
-    uint8_t L; /**< upsample factor. */
-    uint16_t phaseLength; /**< length of each polyphase filter component. */
-    const q15_t
-        *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */
-    q15_t *
-        pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
-} arm_fir_interpolate_instance_q15;
+  typedef struct
+  {
+        uint8_t L;                      /**< upsample factor. */
+        uint16_t phaseLength;           /**< length of each polyphase filter component. */
+  const q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
+        q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+  } arm_fir_interpolate_instance_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 FIR interpolator.
    */
-typedef struct {
-    uint8_t L; /**< upsample factor. */
-    uint16_t phaseLength; /**< length of each polyphase filter component. */
-    const q31_t
-        *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */
-    q31_t *
-        pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
-} arm_fir_interpolate_instance_q31;
+  typedef struct
+  {
+        uint8_t L;                      /**< upsample factor. */
+        uint16_t phaseLength;           /**< length of each polyphase filter component. */
+  const q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
+        q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+  } arm_fir_interpolate_instance_q31;
 
-/**
+  /**
    * @brief Instance structure for the floating-point FIR interpolator.
    */
-typedef struct {
-    uint8_t L; /**< upsample factor. */
-    uint16_t phaseLength; /**< length of each polyphase filter component. */
-    const float32_t
-        *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */
-    float32_t *
-        pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
-} arm_fir_interpolate_instance_f32;
+  typedef struct
+  {
+        uint8_t L;                     /**< upsample factor. */
+        uint16_t phaseLength;          /**< length of each polyphase filter component. */
+  const float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
+        float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
+  } arm_fir_interpolate_instance_f32;
 
-/**
+
+  /**
    * @brief Processing function for the Q15 FIR interpolator.
    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of input samples to process per call.
    */
-void arm_fir_interpolate_q15(const arm_fir_interpolate_instance_q15 *S, const q15_t *pSrc,
-                             q15_t *pDst, uint32_t blockSize);
+  void arm_fir_interpolate_q15(
+  const arm_fir_interpolate_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q15 FIR interpolator.
    * @param[in,out] S          points to an instance of the Q15 FIR interpolator structure.
    * @param[in]     L          upsample factor.
@@ -797,21 +1065,30 @@ void arm_fir_interpolate_q15(const arm_fir_interpolate_instance_q15 *S, const q1
    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
-arm_status arm_fir_interpolate_init_q15(arm_fir_interpolate_instance_q15 *S, uint8_t L,
-                                        uint16_t numTaps, const q15_t *pCoeffs, q15_t *pState,
-                                        uint32_t blockSize);
+  arm_status arm_fir_interpolate_init_q15(
+        arm_fir_interpolate_instance_q15 * S,
+        uint8_t L,
+        uint16_t numTaps,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q31 FIR interpolator.
    * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of input samples to process per call.
    */
-void arm_fir_interpolate_q31(const arm_fir_interpolate_instance_q31 *S, const q31_t *pSrc,
-                             q31_t *pDst, uint32_t blockSize);
+  void arm_fir_interpolate_q31(
+  const arm_fir_interpolate_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q31 FIR interpolator.
    * @param[in,out] S          points to an instance of the Q31 FIR interpolator structure.
    * @param[in]     L          upsample factor.
@@ -822,21 +1099,30 @@ void arm_fir_interpolate_q31(const arm_fir_interpolate_instance_q31 *S, const q3
    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
-arm_status arm_fir_interpolate_init_q31(arm_fir_interpolate_instance_q31 *S, uint8_t L,
-                                        uint16_t numTaps, const q31_t *pCoeffs, q31_t *pState,
-                                        uint32_t blockSize);
+  arm_status arm_fir_interpolate_init_q31(
+        arm_fir_interpolate_instance_q31 * S,
+        uint8_t L,
+        uint16_t numTaps,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the floating-point FIR interpolator.
    * @param[in]  S          points to an instance of the floating-point FIR interpolator structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of input samples to process per call.
    */
-void arm_fir_interpolate_f32(const arm_fir_interpolate_instance_f32 *S, const float32_t *pSrc,
-                             float32_t *pDst, uint32_t blockSize);
+  void arm_fir_interpolate_f32(
+  const arm_fir_interpolate_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the floating-point FIR interpolator.
    * @param[in,out] S          points to an instance of the floating-point FIR interpolator structure.
    * @param[in]     L          upsample factor.
@@ -847,217 +1133,268 @@ void arm_fir_interpolate_f32(const arm_fir_interpolate_instance_f32 *S, const fl
    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
-arm_status arm_fir_interpolate_init_f32(arm_fir_interpolate_instance_f32 *S, uint8_t L,
-                                        uint16_t numTaps, const float32_t *pCoeffs,
-                                        float32_t *pState, uint32_t blockSize);
+  arm_status arm_fir_interpolate_init_f32(
+        arm_fir_interpolate_instance_f32 * S,
+        uint8_t L,
+        uint16_t numTaps,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
    */
-typedef struct {
-    uint8_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q63_t *
-        pState; /**< points to the array of state coefficients.  The array is of length 4*numStages. */
-    const q31_t
-        *pCoeffs; /**< points to the array of coefficients.  The array is of length 5*numStages. */
-    uint8_t postShift; /**< additional shift, in bits, applied to each output sample. */
-} arm_biquad_cas_df1_32x64_ins_q31;
+  typedef struct
+  {
+          uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    const q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
+  } arm_biquad_cas_df1_32x64_ins_q31;
 
-/**
+
+  /**
    * @param[in]  S          points to an instance of the high precision Q31 Biquad cascade filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cas_df1_32x64_q31(const arm_biquad_cas_df1_32x64_ins_q31 *S, const q31_t *pSrc,
-                                  q31_t *pDst, uint32_t blockSize);
+  void arm_biquad_cas_df1_32x64_q31(
+  const arm_biquad_cas_df1_32x64_ins_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @param[in,out] S          points to an instance of the high precision Q31 Biquad cascade filter structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    * @param[in]     postShift  shift to be applied to the output. Varies according to the coefficients format
    */
-void arm_biquad_cas_df1_32x64_init_q31(arm_biquad_cas_df1_32x64_ins_q31 *S, uint8_t numStages,
-                                       const q31_t *pCoeffs, q63_t *pState, uint8_t postShift);
+  void arm_biquad_cas_df1_32x64_init_q31(
+        arm_biquad_cas_df1_32x64_ins_q31 * S,
+        uint8_t numStages,
+  const q31_t * pCoeffs,
+        q63_t * pState,
+        uint8_t postShift);
 
-/**
+
+  /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-typedef struct {
-    uint8_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *
-        pState; /**< points to the array of state coefficients.  The array is of length 2*numStages. */
-    const float32_t
-        *pCoeffs; /**< points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_cascade_df2T_instance_f32;
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+    const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_cascade_df2T_instance_f32;
 
-/**
+  /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-typedef struct {
-    uint8_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *
-        pState; /**< points to the array of state coefficients.  The array is of length 4*numStages. */
-    const float32_t
-        *pCoeffs; /**< points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_cascade_stereo_df2T_instance_f32;
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_cascade_stereo_df2T_instance_f32;
 
-/**
+  /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-typedef struct {
-    uint8_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float64_t *
-        pState; /**< points to the array of state coefficients.  The array is of length 2*numStages. */
-    const float64_t
-        *pCoeffs; /**< points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_cascade_df2T_instance_f64;
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+    const float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_cascade_df2T_instance_f64;
 
-/**
+
+  /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in]  S          points to an instance of the filter data structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df2T_f32(const arm_biquad_cascade_df2T_instance_f32 *S,
-                                 const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df2T_f32(
+  const arm_biquad_cascade_df2T_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
    * @param[in]  S          points to an instance of the filter data structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_stereo_df2T_f32(const arm_biquad_cascade_stereo_df2T_instance_f32 *S,
-                                        const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_stereo_df2T_f32(
+  const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in]  S          points to an instance of the filter data structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df2T_f64(const arm_biquad_cascade_df2T_instance_f64 *S,
-                                 const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df2T_f64(
+  const arm_biquad_cascade_df2T_instance_f64 * S,
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-#if defined(ARM_MATH_NEON)
+
+#if defined(ARM_MATH_NEON) 
 /**
   @brief         Compute new coefficient arrays for use in vectorized filter (Neon only).
   @param[in]     numStages         number of 2nd order stages in the filter.
   @param[in]     pCoeffs           points to the original filter coefficients.
   @param[in]     pComputedCoeffs   points to the new computed coefficients for the vectorized version.
-  @return        none
 */
-void arm_biquad_cascade_df2T_compute_coefs_f32(uint8_t numStages, const float32_t *pCoeffs,
-                                               float32_t *pComputedCoeffs);
+void arm_biquad_cascade_df2T_compute_coefs_f32(
+  uint8_t numStages,
+  const float32_t * pCoeffs,
+  float32_t * pComputedCoeffs);
 #endif
-/**
+  /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in,out] S          points to an instance of the filter data structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    */
-void arm_biquad_cascade_df2T_init_f32(arm_biquad_cascade_df2T_instance_f32 *S, uint8_t numStages,
-                                      const float32_t *pCoeffs, float32_t *pState);
+  void arm_biquad_cascade_df2T_init_f32(
+        arm_biquad_cascade_df2T_instance_f32 * S,
+        uint8_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
-/**
+
+  /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in,out] S          points to an instance of the filter data structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    */
-void arm_biquad_cascade_stereo_df2T_init_f32(arm_biquad_cascade_stereo_df2T_instance_f32 *S,
-                                             uint8_t numStages, const float32_t *pCoeffs,
-                                             float32_t *pState);
+  void arm_biquad_cascade_stereo_df2T_init_f32(
+        arm_biquad_cascade_stereo_df2T_instance_f32 * S,
+        uint8_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
-/**
+
+  /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in,out] S          points to an instance of the filter data structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    */
-void arm_biquad_cascade_df2T_init_f64(arm_biquad_cascade_df2T_instance_f64 *S, uint8_t numStages,
-                                      const float64_t *pCoeffs, float64_t *pState);
+  void arm_biquad_cascade_df2T_init_f64(
+        arm_biquad_cascade_df2T_instance_f64 * S,
+        uint8_t numStages,
+        const float64_t * pCoeffs,
+        float64_t * pState);
 
-/**
+
+  /**
    * @brief Instance structure for the Q15 FIR lattice filter.
    */
-typedef struct {
-    uint16_t numStages; /**< number of filter stages. */
-    q15_t *pState; /**< points to the state variable array. The array is of length numStages. */
-    const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */
-} arm_fir_lattice_instance_q15;
+  typedef struct
+  {
+          uint16_t numStages;                  /**< number of filter stages. */
+          q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
+    const q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
+  } arm_fir_lattice_instance_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 FIR lattice filter.
    */
-typedef struct {
-    uint16_t numStages; /**< number of filter stages. */
-    q31_t *pState; /**< points to the state variable array. The array is of length numStages. */
-    const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */
-} arm_fir_lattice_instance_q31;
+  typedef struct
+  {
+          uint16_t numStages;                  /**< number of filter stages. */
+          q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
+    const q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
+  } arm_fir_lattice_instance_q31;
 
-/**
+  /**
    * @brief Instance structure for the floating-point FIR lattice filter.
    */
-typedef struct {
-    uint16_t numStages; /**< number of filter stages. */
-    float32_t *pState; /**< points to the state variable array. The array is of length numStages. */
-    const float32_t
-        *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */
-} arm_fir_lattice_instance_f32;
+  typedef struct
+  {
+          uint16_t numStages;                  /**< number of filter stages. */
+          float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
+    const float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
+  } arm_fir_lattice_instance_f32;
 
-/**
+
+  /**
    * @brief Initialization function for the Q15 FIR lattice filter.
    * @param[in] S          points to an instance of the Q15 FIR lattice structure.
    * @param[in] numStages  number of filter stages.
    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
    * @param[in] pState     points to the state buffer.  The array is of length numStages.
    */
-void arm_fir_lattice_init_q15(arm_fir_lattice_instance_q15 *S, uint16_t numStages,
-                              const q15_t *pCoeffs, q15_t *pState);
+  void arm_fir_lattice_init_q15(
+        arm_fir_lattice_instance_q15 * S,
+        uint16_t numStages,
+  const q15_t * pCoeffs,
+        q15_t * pState);
 
-/**
+
+  /**
    * @brief Processing function for the Q15 FIR lattice filter.
    * @param[in]  S          points to an instance of the Q15 FIR lattice structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_lattice_q15(const arm_fir_lattice_instance_q15 *S, const q15_t *pSrc, q15_t *pDst,
-                         uint32_t blockSize);
+  void arm_fir_lattice_q15(
+  const arm_fir_lattice_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for the Q31 FIR lattice filter.
    * @param[in] S          points to an instance of the Q31 FIR lattice structure.
    * @param[in] numStages  number of filter stages.
    * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
    * @param[in] pState     points to the state buffer.   The array is of length numStages.
    */
-void arm_fir_lattice_init_q31(arm_fir_lattice_instance_q31 *S, uint16_t numStages,
-                              const q31_t *pCoeffs, q31_t *pState);
+  void arm_fir_lattice_init_q31(
+        arm_fir_lattice_instance_q31 * S,
+        uint16_t numStages,
+  const q31_t * pCoeffs,
+        q31_t * pState);
 
-/**
+
+  /**
    * @brief Processing function for the Q31 FIR lattice filter.
    * @param[in]  S          points to an instance of the Q31 FIR lattice structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_lattice_q31(const arm_fir_lattice_instance_q31 *S, const q31_t *pSrc, q31_t *pDst,
-                         uint32_t blockSize);
+  void arm_fir_lattice_q31(
+  const arm_fir_lattice_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
 
 /**
  * @brief Initialization function for the floating-point FIR lattice filter.
@@ -1066,69 +1403,76 @@ void arm_fir_lattice_q31(const arm_fir_lattice_instance_q31 *S, const q31_t *pSr
  * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
  * @param[in] pState     points to the state buffer.  The array is of length numStages.
  */
-void arm_fir_lattice_init_f32(arm_fir_lattice_instance_f32 *S, uint16_t numStages,
-                              const float32_t *pCoeffs, float32_t *pState);
+  void arm_fir_lattice_init_f32(
+        arm_fir_lattice_instance_f32 * S,
+        uint16_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
-/**
+
+  /**
    * @brief Processing function for the floating-point FIR lattice filter.
    * @param[in]  S          points to an instance of the floating-point FIR lattice structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_lattice_f32(const arm_fir_lattice_instance_f32 *S, const float32_t *pSrc,
-                         float32_t *pDst, uint32_t blockSize);
+  void arm_fir_lattice_f32(
+  const arm_fir_lattice_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the Q15 IIR lattice filter.
    */
-typedef struct {
-    uint16_t numStages; /**< number of stages in the filter. */
-    q15_t *
-        pState; /**< points to the state variable array. The array is of length numStages+blockSize. */
-    q15_t *
-        pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */
-    q15_t *
-        pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */
-} arm_iir_lattice_instance_q15;
+  typedef struct
+  {
+          uint16_t numStages;                  /**< number of stages in the filter. */
+          q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
+          q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
+          q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
+  } arm_iir_lattice_instance_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 IIR lattice filter.
    */
-typedef struct {
-    uint16_t numStages; /**< number of stages in the filter. */
-    q31_t *
-        pState; /**< points to the state variable array. The array is of length numStages+blockSize. */
-    q31_t *
-        pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */
-    q31_t *
-        pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */
-} arm_iir_lattice_instance_q31;
+  typedef struct
+  {
+          uint16_t numStages;                  /**< number of stages in the filter. */
+          q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
+          q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
+          q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
+  } arm_iir_lattice_instance_q31;
 
-/**
+  /**
    * @brief Instance structure for the floating-point IIR lattice filter.
    */
-typedef struct {
-    uint16_t numStages; /**< number of stages in the filter. */
-    float32_t *
-        pState; /**< points to the state variable array. The array is of length numStages+blockSize. */
-    float32_t *
-        pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */
-    float32_t *
-        pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */
-} arm_iir_lattice_instance_f32;
+  typedef struct
+  {
+          uint16_t numStages;                  /**< number of stages in the filter. */
+          float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
+          float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
+          float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
+  } arm_iir_lattice_instance_f32;
 
-/**
+
+  /**
    * @brief Processing function for the floating-point IIR lattice filter.
    * @param[in]  S          points to an instance of the floating-point IIR lattice structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_iir_lattice_f32(const arm_iir_lattice_instance_f32 *S, const float32_t *pSrc,
-                         float32_t *pDst, uint32_t blockSize);
+  void arm_iir_lattice_f32(
+  const arm_iir_lattice_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for the floating-point IIR lattice filter.
    * @param[in] S          points to an instance of the floating-point IIR lattice structure.
    * @param[in] numStages  number of stages in the filter.
@@ -1137,21 +1481,30 @@ void arm_iir_lattice_f32(const arm_iir_lattice_instance_f32 *S, const float32_t
    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize-1.
    * @param[in] blockSize  number of samples to process.
    */
-void arm_iir_lattice_init_f32(arm_iir_lattice_instance_f32 *S, uint16_t numStages,
-                              float32_t *pkCoeffs, float32_t *pvCoeffs, float32_t *pState,
-                              uint32_t blockSize);
+  void arm_iir_lattice_init_f32(
+        arm_iir_lattice_instance_f32 * S,
+        uint16_t numStages,
+        float32_t * pkCoeffs,
+        float32_t * pvCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q31 IIR lattice filter.
    * @param[in]  S          points to an instance of the Q31 IIR lattice structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_iir_lattice_q31(const arm_iir_lattice_instance_q31 *S, const q31_t *pSrc, q31_t *pDst,
-                         uint32_t blockSize);
+  void arm_iir_lattice_q31(
+  const arm_iir_lattice_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for the Q31 IIR lattice filter.
    * @param[in] S          points to an instance of the Q31 IIR lattice structure.
    * @param[in] numStages  number of stages in the filter.
@@ -1160,18 +1513,28 @@ void arm_iir_lattice_q31(const arm_iir_lattice_instance_q31 *S, const q31_t *pSr
    * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize.
    * @param[in] blockSize  number of samples to process.
    */
-void arm_iir_lattice_init_q31(arm_iir_lattice_instance_q31 *S, uint16_t numStages, q31_t *pkCoeffs,
-                              q31_t *pvCoeffs, q31_t *pState, uint32_t blockSize);
+  void arm_iir_lattice_init_q31(
+        arm_iir_lattice_instance_q31 * S,
+        uint16_t numStages,
+        q31_t * pkCoeffs,
+        q31_t * pvCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q15 IIR lattice filter.
    * @param[in]  S          points to an instance of the Q15 IIR lattice structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_iir_lattice_q15(const arm_iir_lattice_instance_q15 *S, const q15_t *pSrc, q15_t *pDst,
-                         uint32_t blockSize);
+  void arm_iir_lattice_q15(
+  const arm_iir_lattice_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
 
 /**
  * @brief Initialization function for the Q15 IIR lattice filter.
@@ -1182,21 +1545,28 @@ void arm_iir_lattice_q15(const arm_iir_lattice_instance_q15 *S, const q15_t *pSr
  * @param[in] pState     points to state buffer.  The array is of length numStages+blockSize.
  * @param[in] blockSize  number of samples to process per call.
  */
-void arm_iir_lattice_init_q15(arm_iir_lattice_instance_q15 *S, uint16_t numStages, q15_t *pkCoeffs,
-                              q15_t *pvCoeffs, q15_t *pState, uint32_t blockSize);
+  void arm_iir_lattice_init_q15(
+        arm_iir_lattice_instance_q15 * S,
+        uint16_t numStages,
+        q15_t * pkCoeffs,
+        q15_t * pvCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the floating-point LMS filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    float32_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-    float32_t mu; /**< step size that controls filter coefficient updates. */
-} arm_lms_instance_f32;
+  typedef struct
+  {
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
+          float32_t mu;        /**< step size that controls filter coefficient updates. */
+  } arm_lms_instance_f32;
 
-/**
+
+  /**
    * @brief Processing function for floating-point LMS filter.
    * @param[in]  S          points to an instance of the floating-point LMS filter structure.
    * @param[in]  pSrc       points to the block of input data.
@@ -1205,10 +1575,16 @@ typedef struct {
    * @param[out] pErr       points to the block of error data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_lms_f32(const arm_lms_instance_f32 *S, const float32_t *pSrc, float32_t *pRef,
-                 float32_t *pOut, float32_t *pErr, uint32_t blockSize);
+  void arm_lms_f32(
+  const arm_lms_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pRef,
+        float32_t * pOut,
+        float32_t * pErr,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for floating-point LMS filter.
    * @param[in] S          points to an instance of the floating-point LMS filter structure.
    * @param[in] numTaps    number of filter coefficients.
@@ -1217,22 +1593,29 @@ void arm_lms_f32(const arm_lms_instance_f32 *S, const float32_t *pSrc, float32_t
    * @param[in] mu         step size that controls filter coefficient updates.
    * @param[in] blockSize  number of samples to process.
    */
-void arm_lms_init_f32(arm_lms_instance_f32 *S, uint16_t numTaps, float32_t *pCoeffs,
-                      float32_t *pState, float32_t mu, uint32_t blockSize);
+  void arm_lms_init_f32(
+        arm_lms_instance_f32 * S,
+        uint16_t numTaps,
+        float32_t * pCoeffs,
+        float32_t * pState,
+        float32_t mu,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the Q15 LMS filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    q15_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-    q15_t mu; /**< step size that controls filter coefficient updates. */
-    uint32_t postShift; /**< bit shift applied to coefficients. */
-} arm_lms_instance_q15;
+  typedef struct
+  {
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
+          q15_t mu;            /**< step size that controls filter coefficient updates. */
+          uint32_t postShift;  /**< bit shift applied to coefficients. */
+  } arm_lms_instance_q15;
 
-/**
+
+  /**
    * @brief Initialization function for the Q15 LMS filter.
    * @param[in] S          points to an instance of the Q15 LMS filter structure.
    * @param[in] numTaps    number of filter coefficients.
@@ -1242,10 +1625,17 @@ typedef struct {
    * @param[in] blockSize  number of samples to process.
    * @param[in] postShift  bit shift applied to coefficients.
    */
-void arm_lms_init_q15(arm_lms_instance_q15 *S, uint16_t numTaps, q15_t *pCoeffs, q15_t *pState,
-                      q15_t mu, uint32_t blockSize, uint32_t postShift);
+  void arm_lms_init_q15(
+        arm_lms_instance_q15 * S,
+        uint16_t numTaps,
+        q15_t * pCoeffs,
+        q15_t * pState,
+        q15_t mu,
+        uint32_t blockSize,
+        uint32_t postShift);
 
-/**
+
+  /**
    * @brief Processing function for Q15 LMS filter.
    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
    * @param[in]  pSrc       points to the block of input data.
@@ -1254,22 +1644,29 @@ void arm_lms_init_q15(arm_lms_instance_q15 *S, uint16_t numTaps, q15_t *pCoeffs,
    * @param[out] pErr       points to the block of error data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_lms_q15(const arm_lms_instance_q15 *S, const q15_t *pSrc, q15_t *pRef, q15_t *pOut,
-                 q15_t *pErr, uint32_t blockSize);
+  void arm_lms_q15(
+  const arm_lms_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pRef,
+        q15_t * pOut,
+        q15_t * pErr,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the Q31 LMS filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    q31_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-    q31_t mu; /**< step size that controls filter coefficient updates. */
-    uint32_t postShift; /**< bit shift applied to coefficients. */
-} arm_lms_instance_q31;
+  typedef struct
+  {
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
+          q31_t mu;            /**< step size that controls filter coefficient updates. */
+          uint32_t postShift;  /**< bit shift applied to coefficients. */
+  } arm_lms_instance_q31;
 
-/**
+
+  /**
    * @brief Processing function for Q31 LMS filter.
    * @param[in]  S          points to an instance of the Q15 LMS filter structure.
    * @param[in]  pSrc       points to the block of input data.
@@ -1278,10 +1675,16 @@ typedef struct {
    * @param[out] pErr       points to the block of error data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_lms_q31(const arm_lms_instance_q31 *S, const q31_t *pSrc, q31_t *pRef, q31_t *pOut,
-                 q31_t *pErr, uint32_t blockSize);
+  void arm_lms_q31(
+  const arm_lms_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pRef,
+        q31_t * pOut,
+        q31_t * pErr,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for Q31 LMS filter.
    * @param[in] S          points to an instance of the Q31 LMS filter structure.
    * @param[in] numTaps    number of filter coefficients.
@@ -1291,23 +1694,31 @@ void arm_lms_q31(const arm_lms_instance_q31 *S, const q31_t *pSrc, q31_t *pRef,
    * @param[in] blockSize  number of samples to process.
    * @param[in] postShift  bit shift applied to coefficients.
    */
-void arm_lms_init_q31(arm_lms_instance_q31 *S, uint16_t numTaps, q31_t *pCoeffs, q31_t *pState,
-                      q31_t mu, uint32_t blockSize, uint32_t postShift);
+  void arm_lms_init_q31(
+        arm_lms_instance_q31 * S,
+        uint16_t numTaps,
+        q31_t * pCoeffs,
+        q31_t * pState,
+        q31_t mu,
+        uint32_t blockSize,
+        uint32_t postShift);
 
-/**
+
+  /**
    * @brief Instance structure for the floating-point normalized LMS filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    float32_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-    float32_t mu; /**< step size that control filter coefficient updates. */
-    float32_t energy; /**< saves previous frame energy. */
-    float32_t x0; /**< saves previous input sample. */
-} arm_lms_norm_instance_f32;
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of coefficients in the filter. */
+          float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+          float32_t mu;         /**< step size that control filter coefficient updates. */
+          float32_t energy;     /**< saves previous frame energy. */
+          float32_t x0;         /**< saves previous input sample. */
+  } arm_lms_norm_instance_f32;
 
-/**
+
+  /**
    * @brief Processing function for floating-point normalized LMS filter.
    * @param[in]  S          points to an instance of the floating-point normalized LMS filter structure.
    * @param[in]  pSrc       points to the block of input data.
@@ -1316,10 +1727,16 @@ typedef struct {
    * @param[out] pErr       points to the block of error data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_lms_norm_f32(arm_lms_norm_instance_f32 *S, const float32_t *pSrc, float32_t *pRef,
-                      float32_t *pOut, float32_t *pErr, uint32_t blockSize);
+  void arm_lms_norm_f32(
+        arm_lms_norm_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pRef,
+        float32_t * pOut,
+        float32_t * pErr,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for floating-point normalized LMS filter.
    * @param[in] S          points to an instance of the floating-point LMS filter structure.
    * @param[in] numTaps    number of filter coefficients.
@@ -1328,25 +1745,32 @@ void arm_lms_norm_f32(arm_lms_norm_instance_f32 *S, const float32_t *pSrc, float
    * @param[in] mu         step size that controls filter coefficient updates.
    * @param[in] blockSize  number of samples to process.
    */
-void arm_lms_norm_init_f32(arm_lms_norm_instance_f32 *S, uint16_t numTaps, float32_t *pCoeffs,
-                           float32_t *pState, float32_t mu, uint32_t blockSize);
+  void arm_lms_norm_init_f32(
+        arm_lms_norm_instance_f32 * S,
+        uint16_t numTaps,
+        float32_t * pCoeffs,
+        float32_t * pState,
+        float32_t mu,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the Q31 normalized LMS filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    q31_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-    q31_t mu; /**< step size that controls filter coefficient updates. */
-    uint8_t postShift; /**< bit shift applied to coefficients. */
-    const q31_t *recipTable; /**< points to the reciprocal initial value table. */
-    q31_t energy; /**< saves previous frame energy. */
-    q31_t x0; /**< saves previous input sample. */
-} arm_lms_norm_instance_q31;
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of coefficients in the filter. */
+          q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
+          q31_t mu;             /**< step size that controls filter coefficient updates. */
+          uint8_t postShift;    /**< bit shift applied to coefficients. */
+    const q31_t *recipTable;    /**< points to the reciprocal initial value table. */
+          q31_t energy;         /**< saves previous frame energy. */
+          q31_t x0;             /**< saves previous input sample. */
+  } arm_lms_norm_instance_q31;
 
-/**
+
+  /**
    * @brief Processing function for Q31 normalized LMS filter.
    * @param[in]  S          points to an instance of the Q31 normalized LMS filter structure.
    * @param[in]  pSrc       points to the block of input data.
@@ -1355,10 +1779,16 @@ typedef struct {
    * @param[out] pErr       points to the block of error data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_lms_norm_q31(arm_lms_norm_instance_q31 *S, const q31_t *pSrc, q31_t *pRef, q31_t *pOut,
-                      q31_t *pErr, uint32_t blockSize);
+  void arm_lms_norm_q31(
+        arm_lms_norm_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pRef,
+        q31_t * pOut,
+        q31_t * pErr,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for Q31 normalized LMS filter.
    * @param[in] S          points to an instance of the Q31 normalized LMS filter structure.
    * @param[in] numTaps    number of filter coefficients.
@@ -1368,25 +1798,33 @@ void arm_lms_norm_q31(arm_lms_norm_instance_q31 *S, const q31_t *pSrc, q31_t *pR
    * @param[in] blockSize  number of samples to process.
    * @param[in] postShift  bit shift applied to coefficients.
    */
-void arm_lms_norm_init_q31(arm_lms_norm_instance_q31 *S, uint16_t numTaps, q31_t *pCoeffs,
-                           q31_t *pState, q31_t mu, uint32_t blockSize, uint8_t postShift);
+  void arm_lms_norm_init_q31(
+        arm_lms_norm_instance_q31 * S,
+        uint16_t numTaps,
+        q31_t * pCoeffs,
+        q31_t * pState,
+        q31_t mu,
+        uint32_t blockSize,
+        uint8_t postShift);
 
-/**
+
+  /**
    * @brief Instance structure for the Q15 normalized LMS filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< Number of coefficients in the filter. */
-    q15_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-    q15_t mu; /**< step size that controls filter coefficient updates. */
-    uint8_t postShift; /**< bit shift applied to coefficients. */
-    const q15_t *recipTable; /**< Points to the reciprocal initial value table. */
-    q15_t energy; /**< saves previous frame energy. */
-    q15_t x0; /**< saves previous input sample. */
-} arm_lms_norm_instance_q15;
+  typedef struct
+  {
+          uint16_t numTaps;     /**< Number of coefficients in the filter. */
+          q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
+          q15_t mu;             /**< step size that controls filter coefficient updates. */
+          uint8_t postShift;    /**< bit shift applied to coefficients. */
+    const q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
+          q15_t energy;         /**< saves previous frame energy. */
+          q15_t x0;             /**< saves previous input sample. */
+  } arm_lms_norm_instance_q15;
 
-/**
+
+  /**
    * @brief Processing function for Q15 normalized LMS filter.
    * @param[in]  S          points to an instance of the Q15 normalized LMS filter structure.
    * @param[in]  pSrc       points to the block of input data.
@@ -1395,10 +1833,16 @@ typedef struct {
    * @param[out] pErr       points to the block of error data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_lms_norm_q15(arm_lms_norm_instance_q15 *S, const q15_t *pSrc, q15_t *pRef, q15_t *pOut,
-                      q15_t *pErr, uint32_t blockSize);
+  void arm_lms_norm_q15(
+        arm_lms_norm_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pRef,
+        q15_t * pOut,
+        q15_t * pErr,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Initialization function for Q15 normalized LMS filter.
    * @param[in] S          points to an instance of the Q15 normalized LMS filter structure.
    * @param[in] numTaps    number of filter coefficients.
@@ -1408,10 +1852,17 @@ void arm_lms_norm_q15(arm_lms_norm_instance_q15 *S, const q15_t *pSrc, q15_t *pR
    * @param[in] blockSize  number of samples to process.
    * @param[in] postShift  bit shift applied to coefficients.
    */
-void arm_lms_norm_init_q15(arm_lms_norm_instance_q15 *S, uint16_t numTaps, q15_t *pCoeffs,
-                           q15_t *pState, q15_t mu, uint32_t blockSize, uint8_t postShift);
+  void arm_lms_norm_init_q15(
+        arm_lms_norm_instance_q15 * S,
+        uint16_t numTaps,
+        q15_t * pCoeffs,
+        q15_t * pState,
+        q15_t mu,
+        uint32_t blockSize,
+        uint8_t postShift);
 
-/**
+
+  /**
    * @brief Correlation of floating-point sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -1419,10 +1870,15 @@ void arm_lms_norm_init_q15(arm_lms_norm_instance_q15 *S, uint16_t numTaps, q15_t
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
-void arm_correlate_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB,
-                       uint32_t srcBLen, float32_t *pDst);
+  void arm_correlate_f32(
+  const float32_t * pSrcA,
+        uint32_t srcALen,
+  const float32_t * pSrcB,
+        uint32_t srcBLen,
+        float32_t * pDst);
 
-/**
+
+  /**
    * @brief Correlation of floating-point sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -1430,8 +1886,13 @@ void arm_correlate_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
-void arm_correlate_f64(const float64_t *pSrcA, uint32_t srcALen, const float64_t *pSrcB,
-                       uint32_t srcBLen, float64_t *pDst);
+  void arm_correlate_f64(
+  const float64_t * pSrcA,
+        uint32_t srcALen,
+  const float64_t * pSrcB,
+        uint32_t srcBLen,
+        float64_t * pDst);
+
 
 /**
  @brief Correlation of Q15 sequences
@@ -1442,8 +1903,14 @@ void arm_correlate_f64(const float64_t *pSrcA, uint32_t srcALen, const float64_t
  @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
  @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
 */
-void arm_correlate_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                           uint32_t srcBLen, q15_t *pDst, q15_t *pScratch);
+void arm_correlate_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch);
+
 
 /**
   @brief Correlation of Q15 sequences.
@@ -1453,8 +1920,13 @@ void arm_correlate_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pS
   @param[in]  srcBLen  length of the second input sequence
   @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
  */
-void arm_correlate_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen,
-                       q15_t *pDst);
+  void arm_correlate_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
+
 
 /**
   @brief         Correlation of Q15 sequences (fast version).
@@ -1463,10 +1935,14 @@ void arm_correlate_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
   @param[in]     pSrcB      points to the second input sequence
   @param[in]     srcBLen    length of the second input sequence
   @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
-  @return        none
  */
-void arm_correlate_fast_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                            uint32_t srcBLen, q15_t *pDst);
+void arm_correlate_fast_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
+
 
 /**
   @brief Correlation of Q15 sequences (fast version).
@@ -1477,10 +1953,16 @@ void arm_correlate_fast_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *p
   @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
   @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
  */
-void arm_correlate_fast_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB,
-                                uint32_t srcBLen, q15_t *pDst, q15_t *pScratch);
+void arm_correlate_fast_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch);
 
-/**
+
+  /**
    * @brief Correlation of Q31 sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -1488,8 +1970,13 @@ void arm_correlate_fast_opt_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
-void arm_correlate_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen,
-                       q31_t *pDst);
+  void arm_correlate_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
+
 
 /**
   @brief Correlation of Q31 sequences (fast version).
@@ -1499,10 +1986,15 @@ void arm_correlate_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB,
   @param[in]  srcBLen  length of the second input sequence
   @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
  */
-void arm_correlate_fast_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB,
-                            uint32_t srcBLen, q31_t *pDst);
+void arm_correlate_fast_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
-/**
+
+ /**
    * @brief Correlation of Q7 sequences.
    * @param[in]  pSrcA      points to the first input sequence.
    * @param[in]  srcALen    length of the first input sequence.
@@ -1512,10 +2004,17 @@ void arm_correlate_fast_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *p
    * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
    */
-void arm_correlate_opt_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB, uint32_t srcBLen,
-                          q7_t *pDst, q15_t *pScratch1, q15_t *pScratch2);
+  void arm_correlate_opt_q7(
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
-/**
+
+  /**
    * @brief Correlation of Q7 sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -1523,61 +2022,68 @@ void arm_correlate_opt_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
-void arm_correlate_q7(const q7_t *pSrcA, uint32_t srcALen, const q7_t *pSrcB, uint32_t srcBLen,
-                      q7_t *pDst);
+  void arm_correlate_q7(
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst);
 
-/**
+
+  /**
    * @brief Instance structure for the floating-point sparse FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    uint16_t stateIndex; /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    float32_t *
-        pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay; /**< points to the array of delay values.  The array is of length numTaps. */
-} arm_fir_sparse_instance_f32;
-
-/**
+  typedef struct
+  {
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_f32;
+
+  /**
    * @brief Instance structure for the Q31 sparse FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    uint16_t stateIndex; /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    q31_t *
-        pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay; /**< points to the array of delay values.  The array is of length numTaps. */
-} arm_fir_sparse_instance_q31;
-
-/**
+  typedef struct
+  {
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_q31;
+
+  /**
    * @brief Instance structure for the Q15 sparse FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    uint16_t stateIndex; /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    q15_t *
-        pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay; /**< points to the array of delay values.  The array is of length numTaps. */
-} arm_fir_sparse_instance_q15;
-
-/**
+  typedef struct
+  {
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_q15;
+
+  /**
    * @brief Instance structure for the Q7 sparse FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of coefficients in the filter. */
-    uint16_t stateIndex; /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay; /**< points to the array of delay values.  The array is of length numTaps. */
-} arm_fir_sparse_instance_q7;
+  typedef struct
+  {
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_q7;
 
-/**
+
+  /**
    * @brief Processing function for the floating-point sparse FIR filter.
    * @param[in]  S           points to an instance of the floating-point sparse FIR structure.
    * @param[in]  pSrc        points to the block of input data.
@@ -1585,10 +2091,15 @@ typedef struct {
    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize   number of input samples to process per call.
    */
-void arm_fir_sparse_f32(arm_fir_sparse_instance_f32 *S, const float32_t *pSrc, float32_t *pDst,
-                        float32_t *pScratchIn, uint32_t blockSize);
+  void arm_fir_sparse_f32(
+        arm_fir_sparse_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        float32_t * pScratchIn,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the floating-point sparse FIR filter.
    * @param[in,out] S          points to an instance of the floating-point sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
@@ -1598,11 +2109,17 @@ void arm_fir_sparse_f32(arm_fir_sparse_instance_f32 *S, const float32_t *pSrc, f
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
-void arm_fir_sparse_init_f32(arm_fir_sparse_instance_f32 *S, uint16_t numTaps,
-                             const float32_t *pCoeffs, float32_t *pState, int32_t *pTapDelay,
-                             uint16_t maxDelay, uint32_t blockSize);
+  void arm_fir_sparse_init_f32(
+        arm_fir_sparse_instance_f32 * S,
+        uint16_t numTaps,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q31 sparse FIR filter.
    * @param[in]  S           points to an instance of the Q31 sparse FIR structure.
    * @param[in]  pSrc        points to the block of input data.
@@ -1610,10 +2127,15 @@ void arm_fir_sparse_init_f32(arm_fir_sparse_instance_f32 *S, uint16_t numTaps,
    * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize   number of input samples to process per call.
    */
-void arm_fir_sparse_q31(arm_fir_sparse_instance_q31 *S, const q31_t *pSrc, q31_t *pDst,
-                        q31_t *pScratchIn, uint32_t blockSize);
+  void arm_fir_sparse_q31(
+        arm_fir_sparse_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        q31_t * pScratchIn,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q31 sparse FIR filter.
    * @param[in,out] S          points to an instance of the Q31 sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
@@ -1623,11 +2145,17 @@ void arm_fir_sparse_q31(arm_fir_sparse_instance_q31 *S, const q31_t *pSrc, q31_t
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
-void arm_fir_sparse_init_q31(arm_fir_sparse_instance_q31 *S, uint16_t numTaps, const q31_t *pCoeffs,
-                             q31_t *pState, int32_t *pTapDelay, uint16_t maxDelay,
-                             uint32_t blockSize);
+  void arm_fir_sparse_init_q31(
+        arm_fir_sparse_instance_q31 * S,
+        uint16_t numTaps,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q15 sparse FIR filter.
    * @param[in]  S            points to an instance of the Q15 sparse FIR structure.
    * @param[in]  pSrc         points to the block of input data.
@@ -1636,10 +2164,16 @@ void arm_fir_sparse_init_q31(arm_fir_sparse_instance_q31 *S, uint16_t numTaps, c
    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize    number of input samples to process per call.
    */
-void arm_fir_sparse_q15(arm_fir_sparse_instance_q15 *S, const q15_t *pSrc, q15_t *pDst,
-                        q15_t *pScratchIn, q31_t *pScratchOut, uint32_t blockSize);
+  void arm_fir_sparse_q15(
+        arm_fir_sparse_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        q15_t * pScratchIn,
+        q31_t * pScratchOut,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q15 sparse FIR filter.
    * @param[in,out] S          points to an instance of the Q15 sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
@@ -1649,11 +2183,17 @@ void arm_fir_sparse_q15(arm_fir_sparse_instance_q15 *S, const q15_t *pSrc, q15_t
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
-void arm_fir_sparse_init_q15(arm_fir_sparse_instance_q15 *S, uint16_t numTaps, const q15_t *pCoeffs,
-                             q15_t *pState, int32_t *pTapDelay, uint16_t maxDelay,
-                             uint32_t blockSize);
+  void arm_fir_sparse_init_q15(
+        arm_fir_sparse_instance_q15 * S,
+        uint16_t numTaps,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Processing function for the Q7 sparse FIR filter.
    * @param[in]  S            points to an instance of the Q7 sparse FIR structure.
    * @param[in]  pSrc         points to the block of input data.
@@ -1662,10 +2202,16 @@ void arm_fir_sparse_init_q15(arm_fir_sparse_instance_q15 *S, uint16_t numTaps, c
    * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize    number of input samples to process per call.
    */
-void arm_fir_sparse_q7(arm_fir_sparse_instance_q7 *S, const q7_t *pSrc, q7_t *pDst,
-                       q7_t *pScratchIn, q31_t *pScratchOut, uint32_t blockSize);
+  void arm_fir_sparse_q7(
+        arm_fir_sparse_instance_q7 * S,
+  const q7_t * pSrc,
+        q7_t * pDst,
+        q7_t * pScratchIn,
+        q31_t * pScratchOut,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Initialization function for the Q7 sparse FIR filter.
    * @param[in,out] S          points to an instance of the Q7 sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
@@ -1675,18 +2221,32 @@ void arm_fir_sparse_q7(arm_fir_sparse_instance_q7 *S, const q7_t *pSrc, q7_t *pD
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
-void arm_fir_sparse_init_q7(arm_fir_sparse_instance_q7 *S, uint16_t numTaps, const q7_t *pCoeffs,
-                            q7_t *pState, int32_t *pTapDelay, uint16_t maxDelay,
-                            uint32_t blockSize);
+  void arm_fir_sparse_init_q7(
+        arm_fir_sparse_instance_q7 * S,
+        uint16_t numTaps,
+  const q7_t * pCoeffs,
+        q7_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
-/**
+
+
+
+ 
+
+  /**
    * @brief floating-point Circular write function.
    */
-__STATIC_FORCEINLINE void arm_circularWrite_f32(int32_t *circBuffer, int32_t L,
-                                                uint16_t *writeOffset, int32_t bufferInc,
-                                                const int32_t *src, int32_t srcInc,
-                                                uint32_t blockSize)
-{
+  __STATIC_FORCEINLINE void arm_circularWrite_f32(
+  int32_t * circBuffer,
+  int32_t L,
+  uint16_t * writeOffset,
+  int32_t bufferInc,
+  const int32_t * src,
+  int32_t srcInc,
+  uint32_t blockSize)
+  {
     uint32_t i = 0U;
     int32_t wOffset;
 
@@ -1697,37 +2257,46 @@ __STATIC_FORCEINLINE void arm_circularWrite_f32(int32_t *circBuffer, int32_t L,
     /* Loop over the blockSize */
     i = blockSize;
 
-    while (i > 0U) {
-        /* copy the input sample to the circular buffer */
-        circBuffer[wOffset] = *src;
+    while (i > 0U)
+    {
+      /* copy the input sample to the circular buffer */
+      circBuffer[wOffset] = *src;
 
-        /* Update the input pointer */
-        src += srcInc;
+      /* Update the input pointer */
+      src += srcInc;
 
-        /* Circularly update wOffset.  Watch out for positive and negative value */
-        wOffset += bufferInc;
-        if (wOffset >= L)
-            wOffset -= L;
+      /* Circularly update wOffset.  Watch out for positive and negative value */
+      wOffset += bufferInc;
+      if (wOffset >= L)
+        wOffset -= L;
 
-        /* Decrement the loop counter */
-        i--;
+      /* Decrement the loop counter */
+      i--;
     }
 
     /* Update the index pointer */
     *writeOffset = (uint16_t)wOffset;
-}
+  }
 
-/**
+
+
+  /**
    * @brief floating-point Circular Read function.
    */
-__STATIC_FORCEINLINE void arm_circularRead_f32(int32_t *circBuffer, int32_t L, int32_t *readOffset,
-                                               int32_t bufferInc, int32_t *dst, int32_t *dst_base,
-                                               int32_t dst_length, int32_t dstInc,
-                                               uint32_t blockSize)
-{
+  __STATIC_FORCEINLINE void arm_circularRead_f32(
+  int32_t * circBuffer,
+  int32_t L,
+  int32_t * readOffset,
+  int32_t bufferInc,
+  int32_t * dst,
+  int32_t * dst_base,
+  int32_t dst_length,
+  int32_t dstInc,
+  uint32_t blockSize)
+  {
     uint32_t i = 0U;
     int32_t rOffset;
-    int32_t *dst_end;
+    int32_t* dst_end;
 
     /* Copy the value of Index pointer that points
      * to the current location from where the input samples to be read */
@@ -1737,39 +2306,48 @@ __STATIC_FORCEINLINE void arm_circularRead_f32(int32_t *circBuffer, int32_t L, i
     /* Loop over the blockSize */
     i = blockSize;
 
-    while (i > 0U) {
-        /* copy the sample from the circular buffer to the destination buffer */
-        *dst = circBuffer[rOffset];
+    while (i > 0U)
+    {
+      /* copy the sample from the circular buffer to the destination buffer */
+      *dst = circBuffer[rOffset];
 
-        /* Update the input pointer */
-        dst += dstInc;
+      /* Update the input pointer */
+      dst += dstInc;
 
-        if (dst == dst_end) {
-            dst = dst_base;
-        }
+      if (dst == dst_end)
+      {
+        dst = dst_base;
+      }
 
-        /* Circularly update rOffset.  Watch out for positive and negative value  */
-        rOffset += bufferInc;
+      /* Circularly update rOffset.  Watch out for positive and negative value  */
+      rOffset += bufferInc;
 
-        if (rOffset >= L) {
-            rOffset -= L;
-        }
+      if (rOffset >= L)
+      {
+        rOffset -= L;
+      }
 
-        /* Decrement the loop counter */
-        i--;
+      /* Decrement the loop counter */
+      i--;
     }
 
     /* Update the index pointer */
     *readOffset = rOffset;
-}
+  }
 
-/**
+
+  /**
    * @brief Q15 Circular write function.
    */
-__STATIC_FORCEINLINE void arm_circularWrite_q15(q15_t *circBuffer, int32_t L, uint16_t *writeOffset,
-                                                int32_t bufferInc, const q15_t *src, int32_t srcInc,
-                                                uint32_t blockSize)
-{
+  __STATIC_FORCEINLINE void arm_circularWrite_q15(
+  q15_t * circBuffer,
+  int32_t L,
+  uint16_t * writeOffset,
+  int32_t bufferInc,
+  const q15_t * src,
+  int32_t srcInc,
+  uint32_t blockSize)
+  {
     uint32_t i = 0U;
     int32_t wOffset;
 
@@ -1780,37 +2358,45 @@ __STATIC_FORCEINLINE void arm_circularWrite_q15(q15_t *circBuffer, int32_t L, ui
     /* Loop over the blockSize */
     i = blockSize;
 
-    while (i > 0U) {
-        /* copy the input sample to the circular buffer */
-        circBuffer[wOffset] = *src;
+    while (i > 0U)
+    {
+      /* copy the input sample to the circular buffer */
+      circBuffer[wOffset] = *src;
 
-        /* Update the input pointer */
-        src += srcInc;
+      /* Update the input pointer */
+      src += srcInc;
 
-        /* Circularly update wOffset.  Watch out for positive and negative value */
-        wOffset += bufferInc;
-        if (wOffset >= L)
-            wOffset -= L;
+      /* Circularly update wOffset.  Watch out for positive and negative value */
+      wOffset += bufferInc;
+      if (wOffset >= L)
+        wOffset -= L;
 
-        /* Decrement the loop counter */
-        i--;
+      /* Decrement the loop counter */
+      i--;
     }
 
     /* Update the index pointer */
     *writeOffset = (uint16_t)wOffset;
-}
+  }
 
-/**
+
+  /**
    * @brief Q15 Circular Read function.
    */
-__STATIC_FORCEINLINE void arm_circularRead_q15(q15_t *circBuffer, int32_t L, int32_t *readOffset,
-                                               int32_t bufferInc, q15_t *dst, q15_t *dst_base,
-                                               int32_t dst_length, int32_t dstInc,
-                                               uint32_t blockSize)
-{
+  __STATIC_FORCEINLINE void arm_circularRead_q15(
+  q15_t * circBuffer,
+  int32_t L,
+  int32_t * readOffset,
+  int32_t bufferInc,
+  q15_t * dst,
+  q15_t * dst_base,
+  int32_t dst_length,
+  int32_t dstInc,
+  uint32_t blockSize)
+  {
     uint32_t i = 0;
     int32_t rOffset;
-    q15_t *dst_end;
+    q15_t* dst_end;
 
     /* Copy the value of Index pointer that points
      * to the current location from where the input samples to be read */
@@ -1821,39 +2407,48 @@ __STATIC_FORCEINLINE void arm_circularRead_q15(q15_t *circBuffer, int32_t L, int
     /* Loop over the blockSize */
     i = blockSize;
 
-    while (i > 0U) {
-        /* copy the sample from the circular buffer to the destination buffer */
-        *dst = circBuffer[rOffset];
+    while (i > 0U)
+    {
+      /* copy the sample from the circular buffer to the destination buffer */
+      *dst = circBuffer[rOffset];
 
-        /* Update the input pointer */
-        dst += dstInc;
+      /* Update the input pointer */
+      dst += dstInc;
 
-        if (dst == dst_end) {
-            dst = dst_base;
-        }
+      if (dst == dst_end)
+      {
+        dst = dst_base;
+      }
 
-        /* Circularly update wOffset.  Watch out for positive and negative value */
-        rOffset += bufferInc;
+      /* Circularly update wOffset.  Watch out for positive and negative value */
+      rOffset += bufferInc;
 
-        if (rOffset >= L) {
-            rOffset -= L;
-        }
+      if (rOffset >= L)
+      {
+        rOffset -= L;
+      }
 
-        /* Decrement the loop counter */
-        i--;
+      /* Decrement the loop counter */
+      i--;
     }
 
     /* Update the index pointer */
     *readOffset = rOffset;
-}
+  }
 
-/**
+
+  /**
    * @brief Q7 Circular write function.
    */
-__STATIC_FORCEINLINE void arm_circularWrite_q7(q7_t *circBuffer, int32_t L, uint16_t *writeOffset,
-                                               int32_t bufferInc, const q7_t *src, int32_t srcInc,
-                                               uint32_t blockSize)
-{
+  __STATIC_FORCEINLINE void arm_circularWrite_q7(
+  q7_t * circBuffer,
+  int32_t L,
+  uint16_t * writeOffset,
+  int32_t bufferInc,
+  const q7_t * src,
+  int32_t srcInc,
+  uint32_t blockSize)
+  {
     uint32_t i = 0U;
     int32_t wOffset;
 
@@ -1864,37 +2459,45 @@ __STATIC_FORCEINLINE void arm_circularWrite_q7(q7_t *circBuffer, int32_t L, uint
     /* Loop over the blockSize */
     i = blockSize;
 
-    while (i > 0U) {
-        /* copy the input sample to the circular buffer */
-        circBuffer[wOffset] = *src;
+    while (i > 0U)
+    {
+      /* copy the input sample to the circular buffer */
+      circBuffer[wOffset] = *src;
 
-        /* Update the input pointer */
-        src += srcInc;
+      /* Update the input pointer */
+      src += srcInc;
 
-        /* Circularly update wOffset.  Watch out for positive and negative value */
-        wOffset += bufferInc;
-        if (wOffset >= L)
-            wOffset -= L;
+      /* Circularly update wOffset.  Watch out for positive and negative value */
+      wOffset += bufferInc;
+      if (wOffset >= L)
+        wOffset -= L;
 
-        /* Decrement the loop counter */
-        i--;
+      /* Decrement the loop counter */
+      i--;
     }
 
     /* Update the index pointer */
     *writeOffset = (uint16_t)wOffset;
-}
+  }
 
-/**
+
+  /**
    * @brief Q7 Circular Read function.
    */
-__STATIC_FORCEINLINE void arm_circularRead_q7(q7_t *circBuffer, int32_t L, int32_t *readOffset,
-                                              int32_t bufferInc, q7_t *dst, q7_t *dst_base,
-                                              int32_t dst_length, int32_t dstInc,
-                                              uint32_t blockSize)
-{
+  __STATIC_FORCEINLINE void arm_circularRead_q7(
+  q7_t * circBuffer,
+  int32_t L,
+  int32_t * readOffset,
+  int32_t bufferInc,
+  q7_t * dst,
+  q7_t * dst_base,
+  int32_t dst_length,
+  int32_t dstInc,
+  uint32_t blockSize)
+  {
     uint32_t i = 0;
     int32_t rOffset;
-    q7_t *dst_end;
+    q7_t* dst_end;
 
     /* Copy the value of Index pointer that points
      * to the current location from where the input samples to be read */
@@ -1905,31 +2508,35 @@ __STATIC_FORCEINLINE void arm_circularRead_q7(q7_t *circBuffer, int32_t L, int32
     /* Loop over the blockSize */
     i = blockSize;
 
-    while (i > 0U) {
-        /* copy the sample from the circular buffer to the destination buffer */
-        *dst = circBuffer[rOffset];
+    while (i > 0U)
+    {
+      /* copy the sample from the circular buffer to the destination buffer */
+      *dst = circBuffer[rOffset];
 
-        /* Update the input pointer */
-        dst += dstInc;
+      /* Update the input pointer */
+      dst += dstInc;
 
-        if (dst == dst_end) {
-            dst = dst_base;
-        }
+      if (dst == dst_end)
+      {
+        dst = dst_base;
+      }
 
-        /* Circularly update rOffset.  Watch out for positive and negative value */
-        rOffset += bufferInc;
+      /* Circularly update rOffset.  Watch out for positive and negative value */
+      rOffset += bufferInc;
 
-        if (rOffset >= L) {
-            rOffset -= L;
-        }
+      if (rOffset >= L)
+      {
+        rOffset -= L;
+      }
 
-        /* Decrement the loop counter */
-        i--;
+      /* Decrement the loop counter */
+      i--;
     }
 
     /* Update the index pointer */
     *readOffset = rOffset;
-}
+  }
+
 
 /**
   @brief         Levinson Durbin
@@ -1937,9 +2544,12 @@ __STATIC_FORCEINLINE void arm_circularRead_q7(q7_t *circBuffer, int32_t L, int32
   @param[out]    a        autoregressive coefficients
   @param[out]    err      prediction error (variance)
   @param[in]     nbCoefs  number of autoregressive coefficients
-  @return        none
  */
-void arm_levinson_durbin_f32(const float32_t *phi, float32_t *a, float32_t *err, int nbCoefs);
+void arm_levinson_durbin_f32(const float32_t *phi,
+  float32_t *a, 
+  float32_t *err,
+  int nbCoefs);
+
 
 /**
   @brief         Levinson Durbin
@@ -1947,11 +2557,13 @@ void arm_levinson_durbin_f32(const float32_t *phi, float32_t *a, float32_t *err,
   @param[out]    a        autoregressive coefficients
   @param[out]    err      prediction error (variance)
   @param[in]     nbCoefs  number of autoregressive coefficients
-  @return        none
  */
-void arm_levinson_durbin_q31(const q31_t *phi, q31_t *a, q31_t *err, int nbCoefs);
+void arm_levinson_durbin_q31(const q31_t *phi,
+  q31_t *a, 
+  q31_t *err,
+  int nbCoefs);
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/filtering_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/filtering_functions_f16.h
old mode 100644
new mode 100755
similarity index 57%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/filtering_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/filtering_functions_f16.h
index f8ad957da20..655cd7e0f56
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/filtering_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/filtering_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _FILTERING_FUNCTIONS_F16_H_
-#define _FILTERING_FUNCTIONS_F16_H_
+ 
+#ifndef FILTERING_FUNCTIONS_F16_H_
+#define FILTERING_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -32,23 +33,25 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+ /**
    * @brief Instance structure for the floating-point FIR filter.
    */
-typedef struct {
-    uint16_t numTaps; /**< number of filter coefficients in the filter. */
-    float16_t *
-        pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    const float16_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */
-} arm_fir_instance_f16;
-
-/**
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float16_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float16_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f16;
+
+  /**
    * @brief  Initialization function for the floating-point FIR filter.
    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
    * @param[in]     numTaps    Number of filter coefficients in the filter.
@@ -56,127 +59,148 @@ typedef struct {
    * @param[in]     pState     points to the state buffer.
    * @param[in]     blockSize  number of samples that are processed at a time.
    */
-void arm_fir_init_f16(arm_fir_instance_f16 *S, uint16_t numTaps, const float16_t *pCoeffs,
-                      float16_t *pState, uint32_t blockSize);
-
-/**
+  void arm_fir_init_f16(
+        arm_fir_instance_f16 * S,
+        uint16_t numTaps,
+  const float16_t * pCoeffs,
+        float16_t * pState,
+        uint32_t blockSize);
+
+  /**
    * @brief Processing function for the floating-point FIR filter.
    * @param[in]  S          points to an instance of the floating-point FIR structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_fir_f16(const arm_fir_instance_f16 *S, const float16_t *pSrc, float16_t *pDst,
-                 uint32_t blockSize);
+  void arm_fir_f16(
+  const arm_fir_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Instance structure for the floating-point Biquad cascade filter.
    */
-typedef struct {
-    uint32_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float16_t *
-        pState; /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    const float16_t
-        *pCoeffs; /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_casd_df1_inst_f16;
+  typedef struct
+  {
+          uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float16_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float16_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_casd_df1_inst_f16;
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-/**
+  /**
    * @brief Instance structure for the modified Biquad coefs required by vectorized code.
    */
-typedef struct {
-    float16_t coeffs
-        [12]
-        [8]; /**< Points to the array of modified coefficients.  The array is of length 32. There is one per stage */
-} arm_biquad_mod_coef_f16;
-#endif
+  typedef struct
+  {
+      float16_t coeffs[12][8]; /**< Points to the array of modified coefficients.  The array is of length 32. There is one per stage */
+  } arm_biquad_mod_coef_f16;
+#endif 
 
-/**
+  /**
    * @brief Processing function for the floating-point Biquad cascade filter.
    * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df1_f16(const arm_biquad_casd_df1_inst_f16 *S, const float16_t *pSrc,
-                                float16_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df1_f16(
+  const arm_biquad_casd_df1_inst_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_biquad_cascade_df1_mve_init_f16(arm_biquad_casd_df1_inst_f16 *S, uint8_t numStages,
-                                         const float16_t *pCoeffs,
-                                         arm_biquad_mod_coef_f16 *pCoeffsMod, float16_t *pState);
+  void arm_biquad_cascade_df1_mve_init_f16(
+      arm_biquad_casd_df1_inst_f16 * S,
+      uint8_t numStages,
+      const float16_t * pCoeffs, 
+      arm_biquad_mod_coef_f16 * pCoeffsMod, 
+      float16_t * pState);
 #endif
 
-void arm_biquad_cascade_df1_init_f16(arm_biquad_casd_df1_inst_f16 *S, uint8_t numStages,
-                                     const float16_t *pCoeffs, float16_t *pState);
+  void arm_biquad_cascade_df1_init_f16(
+        arm_biquad_casd_df1_inst_f16 * S,
+        uint8_t numStages,
+  const float16_t * pCoeffs,
+        float16_t * pState);
 
-/**
+  /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-typedef struct {
-    uint8_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float16_t *
-        pState; /**< points to the array of state coefficients.  The array is of length 2*numStages. */
-    const float16_t
-        *pCoeffs; /**< points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_cascade_df2T_instance_f16;
-
-/**
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float16_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+    const float16_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_cascade_df2T_instance_f16;
+
+  /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-typedef struct {
-    uint8_t
-        numStages; /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float16_t *
-        pState; /**< points to the array of state coefficients.  The array is of length 4*numStages. */
-    const float16_t
-        *pCoeffs; /**< points to the array of coefficients.  The array is of length 5*numStages. */
-} arm_biquad_cascade_stereo_df2T_instance_f16;
-
-/**
+  typedef struct
+  {
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float16_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float16_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_cascade_stereo_df2T_instance_f16;
+
+  /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in]  S          points to an instance of the filter data structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_df2T_f16(const arm_biquad_cascade_df2T_instance_f16 *S,
-                                 const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_df2T_f16(
+  const arm_biquad_cascade_df2T_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
    * @param[in]  S          points to an instance of the filter data structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_biquad_cascade_stereo_df2T_f16(const arm_biquad_cascade_stereo_df2T_instance_f16 *S,
-                                        const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+  void arm_biquad_cascade_stereo_df2T_f16(
+  const arm_biquad_cascade_stereo_df2T_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in,out] S          points to an instance of the filter data structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    */
-void arm_biquad_cascade_df2T_init_f16(arm_biquad_cascade_df2T_instance_f16 *S, uint8_t numStages,
-                                      const float16_t *pCoeffs, float16_t *pState);
+  void arm_biquad_cascade_df2T_init_f16(
+        arm_biquad_cascade_df2T_instance_f16 * S,
+        uint8_t numStages,
+  const float16_t * pCoeffs,
+        float16_t * pState);
 
-/**
+  /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in,out] S          points to an instance of the filter data structure.
    * @param[in]     numStages  number of 2nd order stages in the filter.
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    */
-void arm_biquad_cascade_stereo_df2T_init_f16(arm_biquad_cascade_stereo_df2T_instance_f16 *S,
-                                             uint8_t numStages, const float16_t *pCoeffs,
-                                             float16_t *pState);
+  void arm_biquad_cascade_stereo_df2T_init_f16(
+        arm_biquad_cascade_stereo_df2T_instance_f16 * S,
+        uint8_t numStages,
+  const float16_t * pCoeffs,
+        float16_t * pState);
 
-/**
+  /**
    * @brief Correlation of floating-point sequences.
    * @param[in]  pSrcA    points to the first input sequence.
    * @param[in]  srcALen  length of the first input sequence.
@@ -184,8 +208,13 @@ void arm_biquad_cascade_stereo_df2T_init_f16(arm_biquad_cascade_stereo_df2T_inst
    * @param[in]  srcBLen  length of the second input sequence.
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
-void arm_correlate_f16(const float16_t *pSrcA, uint32_t srcALen, const float16_t *pSrcB,
-                       uint32_t srcBLen, float16_t *pDst);
+  void arm_correlate_f16(
+  const float16_t * pSrcA,
+        uint32_t srcALen,
+  const float16_t * pSrcB,
+        uint32_t srcBLen,
+        float16_t * pDst);
+
 
 /**
   @brief         Levinson Durbin
@@ -193,12 +222,14 @@ void arm_correlate_f16(const float16_t *pSrcA, uint32_t srcALen, const float16_t
   @param[out]    a        autoregressive coefficients
   @param[out]    err      prediction error (variance)
   @param[in]     nbCoefs  number of autoregressive coefficients
-  @return        none
  */
-void arm_levinson_durbin_f16(const float16_t *phi, float16_t *a, float16_t *err, int nbCoefs);
+void arm_levinson_durbin_f16(const float16_t *phi,
+  float16_t *a, 
+  float16_t *err,
+  int nbCoefs);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/interpolation_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/interpolation_functions.h
old mode 100644
new mode 100755
similarity index 64%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/interpolation_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/interpolation_functions.h
index ab4061efae4..574b73738f5
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/interpolation_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/interpolation_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _INTERPOLATION_FUNCTIONS_H_
-#define _INTERPOLATION_FUNCTIONS_H_
+ 
+#ifndef INTERPOLATION_FUNCTIONS_H_
+#define INTERPOLATION_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -32,10 +33,12 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
+
 /**
  * @defgroup groupInterpolation Interpolation Functions
  * These functions perform 1- and 2-dimensional interpolation of data.
@@ -43,91 +46,95 @@ extern "C" {
  * bilinear interpolation is used for 2-dimensional data.
  */
 
-/**
+
+  /**
    * @brief Instance structure for the floating-point Linear Interpolate function.
    */
-typedef struct {
-    uint32_t nValues; /**< nValues */
-    float32_t x1; /**< x1 */
-    float32_t xSpacing; /**< xSpacing */
-    float32_t *pYData; /**< pointer to the table of Y values */
-} arm_linear_interp_instance_f32;
+  typedef struct
+  {
+          uint32_t nValues;           /**< nValues */
+          float32_t x1;               /**< x1 */
+          float32_t xSpacing;         /**< xSpacing */
+          const float32_t *pYData;          /**< pointer to the table of Y values */
+  } arm_linear_interp_instance_f32;
 
-/**
+  /**
    * @brief Instance structure for the floating-point bilinear interpolation function.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows in the data table. */
-    uint16_t numCols; /**< number of columns in the data table. */
-    float32_t *pData; /**< points to the data table. */
-} arm_bilinear_interp_instance_f32;
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          const float32_t *pData;   /**< points to the data table. */
+  } arm_bilinear_interp_instance_f32;
 
-/**
+   /**
    * @brief Instance structure for the Q31 bilinear interpolation function.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows in the data table. */
-    uint16_t numCols; /**< number of columns in the data table. */
-    q31_t *pData; /**< points to the data table. */
-} arm_bilinear_interp_instance_q31;
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          const q31_t *pData;       /**< points to the data table. */
+  } arm_bilinear_interp_instance_q31;
 
-/**
+   /**
    * @brief Instance structure for the Q15 bilinear interpolation function.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows in the data table. */
-    uint16_t numCols; /**< number of columns in the data table. */
-    q15_t *pData; /**< points to the data table. */
-} arm_bilinear_interp_instance_q15;
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          const q15_t *pData;       /**< points to the data table. */
+  } arm_bilinear_interp_instance_q15;
 
-/**
+   /**
    * @brief Instance structure for the Q15 bilinear interpolation function.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows in the data table. */
-    uint16_t numCols; /**< number of columns in the data table. */
-    q7_t *pData; /**< points to the data table. */
-} arm_bilinear_interp_instance_q7;
+  typedef struct
+  {
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          const q7_t *pData;        /**< points to the data table. */
+  } arm_bilinear_interp_instance_q7;
 
-/**
+
+  /**
    * @brief Struct for specifying cubic spline type
    */
-typedef enum {
-    ARM_SPLINE_NATURAL = 0, /**< Natural spline */
-    ARM_SPLINE_PARABOLIC_RUNOUT = 1 /**< Parabolic runout spline */
-} arm_spline_type;
+  typedef enum
+  {
+    ARM_SPLINE_NATURAL = 0,           /**< Natural spline */
+    ARM_SPLINE_PARABOLIC_RUNOUT = 1   /**< Parabolic runout spline */
+  } arm_spline_type;
 
-/**
+  /**
    * @brief Instance structure for the floating-point cubic spline interpolation.
    */
-typedef struct {
-    arm_spline_type type; /**< Type (boundary conditions) */
-    const float32_t *x; /**< x values */
-    const float32_t *y; /**< y values */
-    uint32_t n_x; /**< Number of known data points */
-    float32_t *coeffs; /**< Coefficients buffer (b,c, and d) */
-} arm_spline_instance_f32;
-
-/**
-   * @ingroup groupInterpolation
-   */
+  typedef struct
+  {
+    arm_spline_type type;      /**< Type (boundary conditions) */
+    const float32_t * x;       /**< x values */
+    const float32_t * y;       /**< y values */
+    uint32_t n_x;              /**< Number of known data points */
+    float32_t * coeffs;        /**< Coefficients buffer (b,c, and d) */
+  } arm_spline_instance_f32;
 
-/**
-   * @addtogroup SplineInterpolate
-   * @{
-   */
 
-/**
+  /**
    * @brief Processing function for the floating-point cubic spline interpolation.
    * @param[in]  S          points to an instance of the floating-point spline structure.
    * @param[in]  xq         points to the x values ot the interpolated data points.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples of output data.
    */
-void arm_spline_f32(arm_spline_instance_f32 *S, const float32_t *xq, float32_t *pDst,
-                    uint32_t blockSize);
+  void arm_spline_f32(
+        arm_spline_instance_f32 * S, 
+  const float32_t * xq,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+  /**
    * @brief Initialization function for the floating-point cubic spline interpolation.
    * @param[in,out] S        points to an instance of the floating-point spline structure.
    * @param[in]     type     type of cubic spline interpolation (boundary conditions)
@@ -137,28 +144,28 @@ void arm_spline_f32(arm_spline_instance_f32 *S, const float32_t *xq, float32_t *
    * @param[in]     coeffs   coefficients array for b, c, and d
    * @param[in]     tempBuffer   buffer array for internal computations
    */
-void arm_spline_init_f32(arm_spline_instance_f32 *S, arm_spline_type type, const float32_t *x,
-                         const float32_t *y, uint32_t n, float32_t *coeffs, float32_t *tempBuffer);
+  void arm_spline_init_f32(
+          arm_spline_instance_f32 * S,
+          arm_spline_type type,
+    const float32_t * x,
+    const float32_t * y,
+          uint32_t n, 
+          float32_t * coeffs,
+          float32_t * tempBuffer);
 
-/**
-   * @} end of SplineInterpolate group
-   */
-
-/**
-   * @addtogroup LinearInterpolate
-   * @{
-   */
 
-/**
+   /**
    * @brief  Process function for the floating-point Linear Interpolation Function.
    * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
    * @param[in]     x  input sample to process
    * @return y processed output sample.
    *
    */
-float32_t arm_linear_interp_f32(arm_linear_interp_instance_f32 *S, float32_t x);
+  float32_t arm_linear_interp_f32(
+  const arm_linear_interp_instance_f32 * S,
+  float32_t x);
 
-/**
+   /**
    *
    * @brief  Process function for the Q31 Linear Interpolation Function.
    * @param[in] pYData   pointer to Q31 Linear Interpolation table
@@ -171,9 +178,12 @@ float32_t arm_linear_interp_f32(arm_linear_interp_instance_f32 *S, float32_t x);
    * This function can support maximum of table size 2^12.
    *
    */
-q31_t arm_linear_interp_q31(const q31_t *pYData, q31_t x, uint32_t nValues);
+  q31_t arm_linear_interp_q31(
+  const q31_t * pYData,
+  q31_t x,
+  uint32_t nValues);
 
-/**
+  /**
    *
    * @brief  Process function for the Q15 Linear Interpolation Function.
    * @param[in] pYData   pointer to Q15 Linear Interpolation table
@@ -186,9 +196,12 @@ q31_t arm_linear_interp_q31(const q31_t *pYData, q31_t x, uint32_t nValues);
    * This function can support maximum of table size 2^12.
    *
    */
-q15_t arm_linear_interp_q15(const q15_t *pYData, q31_t x, uint32_t nValues);
+  q15_t arm_linear_interp_q15(
+  const q15_t * pYData,
+  q31_t x,
+  uint32_t nValues);
 
-/**
+  /**
    *
    * @brief  Process function for the Q7 Linear Interpolation Function.
    * @param[in] pYData   pointer to Q7 Linear Interpolation table
@@ -200,62 +213,62 @@ q15_t arm_linear_interp_q15(const q15_t *pYData, q31_t x, uint32_t nValues);
    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
    * This function can support maximum of table size 2^12.
    */
-q7_t arm_linear_interp_q7(const q7_t *pYData, q31_t x, uint32_t nValues);
+q7_t arm_linear_interp_q7(
+  const q7_t * pYData,
+  q31_t x,
+  uint32_t nValues);
 
-/**
-   * @} end of LinearInterpolate group
-   */
-
-/**
-   * @ingroup groupInterpolation
-   */
-
-/**
-   * @addtogroup BilinearInterpolate
-   * @{
-   */
-
-/**
+  /**
   * @brief  Floating-point bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate.
   * @param[in]     Y  interpolation coordinate.
   * @return out interpolated value.
   */
-float32_t arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 *S, float32_t X,
-                                  float32_t Y);
+  float32_t arm_bilinear_interp_f32(
+  const arm_bilinear_interp_instance_f32 * S,
+  float32_t X,
+  float32_t Y);
 
-/**
+  /**
   * @brief  Q31 bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate in 12.20 format.
   * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-q31_t arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 *S, q31_t X, q31_t Y);
+  q31_t arm_bilinear_interp_q31(
+  arm_bilinear_interp_instance_q31 * S,
+  q31_t X,
+  q31_t Y);
 
-/**
+
+  /**
   * @brief  Q15 bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate in 12.20 format.
   * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-q15_t arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 *S, q31_t X, q31_t Y);
+  q15_t arm_bilinear_interp_q15(
+  arm_bilinear_interp_instance_q15 * S,
+  q31_t X,
+  q31_t Y);
 
-/**
+  /**
   * @brief  Q7 bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate in 12.20 format.
   * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-q7_t arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 *S, q31_t X, q31_t Y);
-/**
-   * @} end of BilinearInterpolate group
-   */
+  q7_t arm_bilinear_interp_q7(
+  arm_bilinear_interp_instance_q7 * S,
+  q31_t X,
+  q31_t Y);
+
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/interpolation_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/interpolation_functions_f16.h
old mode 100644
new mode 100755
similarity index 70%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/interpolation_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/interpolation_functions_f16.h
index 19f91b3c05d..e1f27c3cdaf
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/interpolation_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/interpolation_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _INTERPOLATION_FUNCTIONS_F16_H_
-#define _INTERPOLATION_FUNCTIONS_F16_H_
+ 
+#ifndef INTERPOLATION_FUNCTIONS_F16_H_
+#define INTERPOLATION_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -32,43 +33,50 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-typedef struct {
-    uint32_t nValues; /**< nValues */
-    float16_t x1; /**< x1 */
-    float16_t xSpacing; /**< xSpacing */
-    float16_t *pYData; /**< pointer to the table of Y values */
+/**
+ * @brief Instance structure for the half floating-point Linear Interpolate function.
+ */
+typedef struct
+{
+    uint32_t  nValues;        /**< nValues */
+    float16_t x1;             /**< x1 */
+    float16_t xSpacing;       /**< xSpacing */
+    const float16_t *pYData;        /**< pointer to the table of Y values */
 } arm_linear_interp_instance_f16;
 
 /**
  * @brief Instance structure for the floating-point bilinear interpolation function.
  */
-typedef struct {
-    uint16_t numRows; /**< number of rows in the data table. */
-    uint16_t numCols; /**< number of columns in the data table. */
-    float16_t *pData; /**< points to the data table. */
+typedef struct
+{
+    uint16_t  numRows;/**< number of rows in the data table. */
+    uint16_t  numCols;/**< number of columns in the data table. */
+    const float16_t *pData; /**< points to the data table. */
 } arm_bilinear_interp_instance_f16;
 
-/**
+  /**
    * @addtogroup LinearInterpolate
    * @{
    */
 
-/**
+    /**
    * @brief  Process function for the floating-point Linear Interpolation Function.
    * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
    * @param[in]     x  input sample to process
    * @return y processed output sample.
-   *
    */
-float16_t arm_linear_interp_f16(arm_linear_interp_instance_f16 *S, float16_t x);
+  float16_t arm_linear_interp_f16(
+  const arm_linear_interp_instance_f16 * S,
+  float16_t x);
 
-/**
+    /**
    * @} end of LinearInterpolate group
    */
 
@@ -77,21 +85,24 @@ float16_t arm_linear_interp_f16(arm_linear_interp_instance_f16 *S, float16_t x);
    * @{
    */
 
-/**
+  /**
   * @brief  Floating-point bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate.
   * @param[in]     Y  interpolation coordinate.
   * @return out interpolated value.
   */
-float16_t arm_bilinear_interp_f16(const arm_bilinear_interp_instance_f16 *S, float16_t X,
-                                  float16_t Y);
+  float16_t arm_bilinear_interp_f16(
+  const arm_bilinear_interp_instance_f16 * S,
+  float16_t X,
+  float16_t Y);
 
-/**
+
+  /**
    * @} end of BilinearInterpolate group
    */
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_functions.h
old mode 100644
new mode 100755
similarity index 65%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_functions.h
index e7409115e2f..175ca2fac22
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     matrix_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     10 August 2022
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _MATRIX_FUNCTIONS_H_
-#define _MATRIX_FUNCTIONS_H_
+ 
+#ifndef MATRIX_FUNCTIONS_H_
+#define MATRIX_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -32,8 +33,9 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
@@ -107,52 +109,60 @@ extern "C" {
  * return <code>ARM_MATH_SUCCESS</code>.
  */
 
-/**
+  #define DEFAULT_HOUSEHOLDER_THRESHOLD_F64 (1.0e-16)
+  #define DEFAULT_HOUSEHOLDER_THRESHOLD_F32 (1.0e-12f)
+
+  /**
    * @brief Instance structure for the floating-point matrix structure.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows of the matrix.     */
-    uint16_t numCols; /**< number of columns of the matrix.  */
-    float32_t *pData; /**< points to the data of the matrix. */
-} arm_matrix_instance_f32;
-
-/**
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float32_t *pData;     /**< points to the data of the matrix. */
+  } arm_matrix_instance_f32;
+ 
+ /**
    * @brief Instance structure for the floating-point matrix structure.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows of the matrix.     */
-    uint16_t numCols; /**< number of columns of the matrix.  */
-    float64_t *pData; /**< points to the data of the matrix. */
-} arm_matrix_instance_f64;
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float64_t *pData;     /**< points to the data of the matrix. */
+  } arm_matrix_instance_f64;
 
-/**
+ /**
    * @brief Instance structure for the Q7 matrix structure.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows of the matrix.     */
-    uint16_t numCols; /**< number of columns of the matrix.  */
-    q7_t *pData; /**< points to the data of the matrix. */
-} arm_matrix_instance_q7;
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q7_t *pData;         /**< points to the data of the matrix. */
+  } arm_matrix_instance_q7;
 
-/**
+  /**
    * @brief Instance structure for the Q15 matrix structure.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows of the matrix.     */
-    uint16_t numCols; /**< number of columns of the matrix.  */
-    q15_t *pData; /**< points to the data of the matrix. */
-} arm_matrix_instance_q15;
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q15_t *pData;         /**< points to the data of the matrix. */
+  } arm_matrix_instance_q15;
 
-/**
+  /**
    * @brief Instance structure for the Q31 matrix structure.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows of the matrix.     */
-    uint16_t numCols; /**< number of columns of the matrix.  */
-    q31_t *pData; /**< points to the data of the matrix. */
-} arm_matrix_instance_q31;
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q31_t *pData;         /**< points to the data of the matrix. */
+  } arm_matrix_instance_q31;
 
-/**
+  /**
    * @brief Floating-point matrix addition.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -160,10 +170,12 @@ typedef struct {
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_add_f32(const arm_matrix_instance_f32 *pSrcA,
-                           const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_add_f32(
+  const arm_matrix_instance_f32 * pSrcA,
+  const arm_matrix_instance_f32 * pSrcB,
+        arm_matrix_instance_f32 * pDst);
 
-/**
+  /**
    * @brief Q15 matrix addition.
    * @param[in]   pSrcA  points to the first input matrix structure
    * @param[in]   pSrcB  points to the second input matrix structure
@@ -171,10 +183,12 @@ arm_status arm_mat_add_f32(const arm_matrix_instance_f32 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_add_q15(const arm_matrix_instance_q15 *pSrcA,
-                           const arm_matrix_instance_q15 *pSrcB, arm_matrix_instance_q15 *pDst);
+arm_status arm_mat_add_q15(
+  const arm_matrix_instance_q15 * pSrcA,
+  const arm_matrix_instance_q15 * pSrcB,
+        arm_matrix_instance_q15 * pDst);
 
-/**
+  /**
    * @brief Q31 matrix addition.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -182,10 +196,12 @@ arm_status arm_mat_add_q15(const arm_matrix_instance_q15 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_add_q31(const arm_matrix_instance_q31 *pSrcA,
-                           const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_add_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Floating-point, complex, matrix multiplication.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -193,11 +209,12 @@ arm_status arm_mat_add_q31(const arm_matrix_instance_q31 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_mult_f32(const arm_matrix_instance_f32 *pSrcA,
-                                  const arm_matrix_instance_f32 *pSrcB,
-                                  arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_cmplx_mult_f32(
+  const arm_matrix_instance_f32 * pSrcA,
+  const arm_matrix_instance_f32 * pSrcB,
+        arm_matrix_instance_f32 * pDst);
 
-/**
+  /**
    * @brief Q15, complex,  matrix multiplication.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -205,11 +222,13 @@ arm_status arm_mat_cmplx_mult_f32(const arm_matrix_instance_f32 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_mult_q15(const arm_matrix_instance_q15 *pSrcA,
-                                  const arm_matrix_instance_q15 *pSrcB,
-                                  arm_matrix_instance_q15 *pDst, q15_t *pScratch);
+arm_status arm_mat_cmplx_mult_q15(
+  const arm_matrix_instance_q15 * pSrcA,
+  const arm_matrix_instance_q15 * pSrcB,
+        arm_matrix_instance_q15 * pDst,
+        q15_t * pScratch);
 
-/**
+  /**
    * @brief Q31, complex, matrix multiplication.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -217,18 +236,21 @@ arm_status arm_mat_cmplx_mult_q15(const arm_matrix_instance_q15 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_mult_q31(const arm_matrix_instance_q31 *pSrcA,
-                                  const arm_matrix_instance_q31 *pSrcB,
-                                  arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_cmplx_mult_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_trans_f32(const arm_matrix_instance_f32 *pSrc, arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_trans_f32(
+  const arm_matrix_instance_f32 * pSrc,
+        arm_matrix_instance_f32 * pDst);
 
 /**
    * @brief Floating-point matrix transpose.
@@ -237,66 +259,78 @@ arm_status arm_mat_trans_f32(const arm_matrix_instance_f32 *pSrc, arm_matrix_ins
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_trans_f64(const arm_matrix_instance_f64 *pSrc, arm_matrix_instance_f64 *pDst);
+arm_status arm_mat_trans_f64(
+  const arm_matrix_instance_f64 * pSrc,
+        arm_matrix_instance_f64 * pDst);
 
-/**
+  /**
    * @brief Floating-point complex matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_trans_f32(const arm_matrix_instance_f32 *pSrc,
-                                   arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_cmplx_trans_f32(
+  const arm_matrix_instance_f32 * pSrc,
+  arm_matrix_instance_f32 * pDst);
 
-/**
+
+  /**
    * @brief Q15 matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_trans_q15(const arm_matrix_instance_q15 *pSrc, arm_matrix_instance_q15 *pDst);
+arm_status arm_mat_trans_q15(
+  const arm_matrix_instance_q15 * pSrc,
+        arm_matrix_instance_q15 * pDst);
 
-/**
+  /**
    * @brief Q15 complex matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_trans_q15(const arm_matrix_instance_q15 *pSrc,
-                                   arm_matrix_instance_q15 *pDst);
+arm_status arm_mat_cmplx_trans_q15(
+  const arm_matrix_instance_q15 * pSrc,
+  arm_matrix_instance_q15 * pDst);
 
-/**
+  /**
    * @brief Q7 matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst);
+arm_status arm_mat_trans_q7(
+  const arm_matrix_instance_q7 * pSrc,
+        arm_matrix_instance_q7 * pDst);
 
-/**
+  /**
    * @brief Q31 matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_trans_q31(const arm_matrix_instance_q31 *pSrc, arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_trans_q31(
+  const arm_matrix_instance_q31 * pSrc,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Q31 complex matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_trans_q31(const arm_matrix_instance_q31 *pSrc,
-                                   arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_cmplx_trans_q31(
+  const arm_matrix_instance_q31 * pSrc,
+  arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix multiplication
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -304,10 +338,12 @@ arm_status arm_mat_cmplx_trans_q31(const arm_matrix_instance_q31 *pSrc,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_f32(const arm_matrix_instance_f32 *pSrcA,
-                            const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_mult_f32(
+  const arm_matrix_instance_f32 * pSrcA,
+  const arm_matrix_instance_f32 * pSrcB,
+        arm_matrix_instance_f32 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix multiplication
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -315,19 +351,23 @@ arm_status arm_mat_mult_f32(const arm_matrix_instance_f32 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_f64(const arm_matrix_instance_f64 *pSrcA,
-                            const arm_matrix_instance_f64 *pSrcB, arm_matrix_instance_f64 *pDst);
+arm_status arm_mat_mult_f64(
+  const arm_matrix_instance_f64 * pSrcA,
+  const arm_matrix_instance_f64 * pSrcB,
+        arm_matrix_instance_f64 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix and vector multiplication
    * @param[in]  pSrcMat  points to the input matrix structure
    * @param[in]  pVec     points to vector
    * @param[out] pDst     points to output vector
    */
-void arm_mat_vec_mult_f32(const arm_matrix_instance_f32 *pSrcMat, const float32_t *pVec,
-                          float32_t *pDst);
+void arm_mat_vec_mult_f32(
+  const arm_matrix_instance_f32 *pSrcMat, 
+  const float32_t *pVec, 
+  float32_t *pDst);
 
-/**
+  /**
    * @brief Q7 matrix multiplication
    * @param[in]  pSrcA   points to the first input matrix structure
    * @param[in]  pSrcB   points to the second input matrix structure
@@ -336,18 +376,24 @@ void arm_mat_vec_mult_f32(const arm_matrix_instance_f32 *pSrcMat, const float32_
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_q7(const arm_matrix_instance_q7 *pSrcA, const arm_matrix_instance_q7 *pSrcB,
-                           arm_matrix_instance_q7 *pDst, q7_t *pState);
+arm_status arm_mat_mult_q7(
+  const arm_matrix_instance_q7 * pSrcA,
+  const arm_matrix_instance_q7 * pSrcB,
+        arm_matrix_instance_q7 * pDst,
+        q7_t * pState);
 
-/**
+  /**
    * @brief Q7 matrix and vector multiplication
    * @param[in]  pSrcMat  points to the input matrix structure
    * @param[in]  pVec     points to vector
    * @param[out] pDst     points to output vector
    */
-void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec, q7_t *pDst);
+void arm_mat_vec_mult_q7(
+  const arm_matrix_instance_q7 *pSrcMat, 
+  const q7_t *pVec, 
+  q7_t *pDst);
 
-/**
+  /**
    * @brief Q15 matrix multiplication
    * @param[in]  pSrcA   points to the first input matrix structure
    * @param[in]  pSrcB   points to the second input matrix structure
@@ -356,19 +402,24 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_q15(const arm_matrix_instance_q15 *pSrcA,
-                            const arm_matrix_instance_q15 *pSrcB, arm_matrix_instance_q15 *pDst,
-                            q15_t *pState);
+arm_status arm_mat_mult_q15(
+  const arm_matrix_instance_q15 * pSrcA,
+  const arm_matrix_instance_q15 * pSrcB,
+        arm_matrix_instance_q15 * pDst,
+        q15_t * pState);
 
-/**
+  /**
    * @brief Q15 matrix and vector multiplication
    * @param[in]  pSrcMat  points to the input matrix structure
    * @param[in]  pVec     points to vector
    * @param[out] pDst     points to output vector
    */
-void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *pVec, q15_t *pDst);
+void arm_mat_vec_mult_q15(
+  const arm_matrix_instance_q15 *pSrcMat, 
+  const q15_t *pVec, 
+  q15_t *pDst);
 
-/**
+  /**
    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA   points to the first input matrix structure
    * @param[in]  pSrcB   points to the second input matrix structure
@@ -377,11 +428,13 @@ void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *p
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_fast_q15(const arm_matrix_instance_q15 *pSrcA,
-                                 const arm_matrix_instance_q15 *pSrcB,
-                                 arm_matrix_instance_q15 *pDst, q15_t *pState);
+arm_status arm_mat_mult_fast_q15(
+  const arm_matrix_instance_q15 * pSrcA,
+  const arm_matrix_instance_q15 * pSrcB,
+        arm_matrix_instance_q15 * pDst,
+        q15_t * pState);
 
-/**
+  /**
    * @brief Q31 matrix multiplication
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -389,10 +442,12 @@ arm_status arm_mat_mult_fast_q15(const arm_matrix_instance_q15 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_q31(const arm_matrix_instance_q31 *pSrcA,
-                            const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_mult_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Q31 matrix multiplication
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -401,19 +456,24 @@ arm_status arm_mat_mult_q31(const arm_matrix_instance_q31 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_opt_q31(const arm_matrix_instance_q31 *pSrcA,
-                                const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst,
-                                q31_t *pState);
+arm_status arm_mat_mult_opt_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst,
+        q31_t *pState);
 
-/**
+  /**
    * @brief Q31 matrix and vector multiplication
    * @param[in]  pSrcMat  points to the input matrix structure
    * @param[in]  pVec     points to vector
    * @param[out] pDst     points to output vector
    */
-void arm_mat_vec_mult_q31(const arm_matrix_instance_q31 *pSrcMat, const q31_t *pVec, q31_t *pDst);
+void arm_mat_vec_mult_q31(
+  const arm_matrix_instance_q31 *pSrcMat, 
+  const q31_t *pVec, 
+  q31_t *pDst);
 
-/**
+  /**
    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -421,11 +481,12 @@ void arm_mat_vec_mult_q31(const arm_matrix_instance_q31 *pSrcMat, const q31_t *p
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_fast_q31(const arm_matrix_instance_q31 *pSrcA,
-                                 const arm_matrix_instance_q31 *pSrcB,
-                                 arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_mult_fast_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix subtraction
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -433,10 +494,12 @@ arm_status arm_mat_mult_fast_q31(const arm_matrix_instance_q31 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_sub_f32(const arm_matrix_instance_f32 *pSrcA,
-                           const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_sub_f32(
+  const arm_matrix_instance_f32 * pSrcA,
+  const arm_matrix_instance_f32 * pSrcB,
+        arm_matrix_instance_f32 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix subtraction
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -444,10 +507,12 @@ arm_status arm_mat_sub_f32(const arm_matrix_instance_f32 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_sub_f64(const arm_matrix_instance_f64 *pSrcA,
-                           const arm_matrix_instance_f64 *pSrcB, arm_matrix_instance_f64 *pDst);
+arm_status arm_mat_sub_f64(
+  const arm_matrix_instance_f64 * pSrcA,
+  const arm_matrix_instance_f64 * pSrcB,
+        arm_matrix_instance_f64 * pDst);
 
-/**
+  /**
    * @brief Q15 matrix subtraction
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -455,10 +520,12 @@ arm_status arm_mat_sub_f64(const arm_matrix_instance_f64 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_sub_q15(const arm_matrix_instance_q15 *pSrcA,
-                           const arm_matrix_instance_q15 *pSrcB, arm_matrix_instance_q15 *pDst);
+arm_status arm_mat_sub_q15(
+  const arm_matrix_instance_q15 * pSrcA,
+  const arm_matrix_instance_q15 * pSrcB,
+        arm_matrix_instance_q15 * pDst);
 
-/**
+  /**
    * @brief Q31 matrix subtraction
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -466,10 +533,12 @@ arm_status arm_mat_sub_q15(const arm_matrix_instance_q15 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_sub_q31(const arm_matrix_instance_q31 *pSrcA,
-                           const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_sub_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix scaling.
    * @param[in]  pSrc   points to the input matrix
    * @param[in]  scale  scale factor
@@ -477,10 +546,12 @@ arm_status arm_mat_sub_q31(const arm_matrix_instance_q31 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_scale_f32(const arm_matrix_instance_f32 *pSrc, float32_t scale,
-                             arm_matrix_instance_f32 *pDst);
+arm_status arm_mat_scale_f32(
+  const arm_matrix_instance_f32 * pSrc,
+        float32_t scale,
+        arm_matrix_instance_f32 * pDst);
 
-/**
+  /**
    * @brief Q15 matrix scaling.
    * @param[in]  pSrc        points to input matrix
    * @param[in]  scaleFract  fractional portion of the scale factor
@@ -489,10 +560,13 @@ arm_status arm_mat_scale_f32(const arm_matrix_instance_f32 *pSrc, float32_t scal
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_scale_q15(const arm_matrix_instance_q15 *pSrc, q15_t scaleFract, int32_t shift,
-                             arm_matrix_instance_q15 *pDst);
+arm_status arm_mat_scale_q15(
+  const arm_matrix_instance_q15 * pSrc,
+        q15_t scaleFract,
+        int32_t shift,
+        arm_matrix_instance_q15 * pDst);
 
-/**
+  /**
    * @brief Q31 matrix scaling.
    * @param[in]  pSrc        points to input matrix
    * @param[in]  scaleFract  fractional portion of the scale factor
@@ -501,56 +575,91 @@ arm_status arm_mat_scale_q15(const arm_matrix_instance_q15 *pSrc, q15_t scaleFra
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_scale_q31(const arm_matrix_instance_q31 *pSrc, q31_t scaleFract, int32_t shift,
-                             arm_matrix_instance_q31 *pDst);
+arm_status arm_mat_scale_q31(
+  const arm_matrix_instance_q31 * pSrc,
+        q31_t scaleFract,
+        int32_t shift,
+        arm_matrix_instance_q31 * pDst);
 
-/**
+  /**
    * @brief  Q31 matrix initialization.
    * @param[in,out] S         points to an instance of the floating-point matrix structure.
    * @param[in]     nRows     number of rows in the matrix.
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-void arm_mat_init_q31(arm_matrix_instance_q31 *S, uint16_t nRows, uint16_t nColumns, q31_t *pData);
+void arm_mat_init_q31(
+        arm_matrix_instance_q31 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        q31_t * pData);
 
-/**
+  /**
    * @brief  Q15 matrix initialization.
    * @param[in,out] S         points to an instance of the floating-point matrix structure.
    * @param[in]     nRows     number of rows in the matrix.
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-void arm_mat_init_q15(arm_matrix_instance_q15 *S, uint16_t nRows, uint16_t nColumns, q15_t *pData);
+void arm_mat_init_q15(
+        arm_matrix_instance_q15 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        q15_t * pData);
 
-/**
+  /**
    * @brief  Floating-point matrix initialization.
    * @param[in,out] S         points to an instance of the floating-point matrix structure.
    * @param[in]     nRows     number of rows in the matrix.
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-void arm_mat_init_f32(arm_matrix_instance_f32 *S, uint16_t nRows, uint16_t nColumns,
-                      float32_t *pData);
+void arm_mat_init_f32(
+        arm_matrix_instance_f32 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        float32_t * pData);
 
 /**
+ * @brief  Floating-point matrix initialization.
+ * @param[in,out] S         points to an instance of the floating-point matrix structure.
+ * @param[in]     nRows     number of rows in the matrix.
+ * @param[in]     nColumns  number of columns in the matrix.
+ * @param[in]     pData     points to the matrix data array.
+ */
+void arm_mat_init_f64(
+      arm_matrix_instance_f64 * S,
+      uint16_t nRows,
+      uint16_t nColumns,
+      float64_t * pData);
+
+
+
+
+  /**
    * @brief Floating-point matrix inverse.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] dst   points to the instance of the output floating-point matrix structure.
    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
    */
-arm_status arm_mat_inverse_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *dst);
+  arm_status arm_mat_inverse_f32(
+  const arm_matrix_instance_f32 * src,
+  arm_matrix_instance_f32 * dst);
 
-/**
+
+  /**
    * @brief Floating-point matrix inverse.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] dst   points to the instance of the output floating-point matrix structure.
    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
    */
-arm_status arm_mat_inverse_f64(const arm_matrix_instance_f64 *src, arm_matrix_instance_f64 *dst);
+  arm_status arm_mat_inverse_f64(
+  const arm_matrix_instance_f64 * src,
+  arm_matrix_instance_f64 * dst);
 
-/**
+ /**
    * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] dst   points to the instance of the output floating-point matrix structure.
@@ -559,9 +668,11 @@ arm_status arm_mat_inverse_f64(const arm_matrix_instance_f64 *src, arm_matrix_in
    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
    * The decomposition is returning a lower triangular matrix.
    */
-arm_status arm_mat_cholesky_f64(const arm_matrix_instance_f64 *src, arm_matrix_instance_f64 *dst);
+  arm_status arm_mat_cholesky_f64(
+  const arm_matrix_instance_f64 * src,
+  arm_matrix_instance_f64 * dst);
 
-/**
+ /**
    * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] dst   points to the instance of the output floating-point matrix structure.
@@ -570,53 +681,61 @@ arm_status arm_mat_cholesky_f64(const arm_matrix_instance_f64 *src, arm_matrix_i
    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
    * The decomposition is returning a lower triangular matrix.
    */
-arm_status arm_mat_cholesky_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *dst);
+  arm_status arm_mat_cholesky_f32(
+  const arm_matrix_instance_f32 * src,
+  arm_matrix_instance_f32 * dst);
 
-/**
+  /**
    * @brief Solve UT . X = A where UT is an upper triangular matrix
    * @param[in]  ut  The upper triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of UT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
   */
-arm_status arm_mat_solve_upper_triangular_f32(const arm_matrix_instance_f32 *ut,
-                                              const arm_matrix_instance_f32 *a,
-                                              arm_matrix_instance_f32 *dst);
+  arm_status arm_mat_solve_upper_triangular_f32(
+  const arm_matrix_instance_f32 * ut,
+  const arm_matrix_instance_f32 * a,
+  arm_matrix_instance_f32 * dst);
 
-/**
+ /**
    * @brief Solve LT . X = A where LT is a lower triangular matrix
    * @param[in]  lt  The lower triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of LT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
    */
-arm_status arm_mat_solve_lower_triangular_f32(const arm_matrix_instance_f32 *lt,
-                                              const arm_matrix_instance_f32 *a,
-                                              arm_matrix_instance_f32 *dst);
+  arm_status arm_mat_solve_lower_triangular_f32(
+  const arm_matrix_instance_f32 * lt,
+  const arm_matrix_instance_f32 * a,
+  arm_matrix_instance_f32 * dst);
 
-/**
+
+  /**
    * @brief Solve UT . X = A where UT is an upper triangular matrix
    * @param[in]  ut  The upper triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of UT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
   */
-arm_status arm_mat_solve_upper_triangular_f64(const arm_matrix_instance_f64 *ut,
-                                              const arm_matrix_instance_f64 *a,
-                                              arm_matrix_instance_f64 *dst);
+  arm_status arm_mat_solve_upper_triangular_f64(
+  const arm_matrix_instance_f64 * ut,
+  const arm_matrix_instance_f64 * a,
+  arm_matrix_instance_f64 * dst);
 
-/**
+ /**
    * @brief Solve LT . X = A where LT is a lower triangular matrix
    * @param[in]  lt  The lower triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of LT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
    */
-arm_status arm_mat_solve_lower_triangular_f64(const arm_matrix_instance_f64 *lt,
-                                              const arm_matrix_instance_f64 *a,
-                                              arm_matrix_instance_f64 *dst);
+  arm_status arm_mat_solve_lower_triangular_f64(
+  const arm_matrix_instance_f64 * lt,
+  const arm_matrix_instance_f64 * a,
+  arm_matrix_instance_f64 * dst);
 
-/**
+
+  /**
    * @brief Floating-point LDL decomposition of Symmetric Positive Semi-Definite Matrix.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] l   points to the instance of the output floating-point triangular matrix structure.
@@ -626,10 +745,13 @@ arm_status arm_mat_solve_lower_triangular_f64(const arm_matrix_instance_f64 *lt,
    * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE.
    * The decomposition is returning a lower triangular matrix.
    */
-arm_status arm_mat_ldlt_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *l,
-                            arm_matrix_instance_f32 *d, uint16_t *pp);
+  arm_status arm_mat_ldlt_f32(
+  const arm_matrix_instance_f32 * src,
+  arm_matrix_instance_f32 * l,
+  arm_matrix_instance_f32 * d,
+  uint16_t * pp);
 
-/**
+ /**
    * @brief Floating-point LDL decomposition of Symmetric Positive Semi-Definite Matrix.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] l   points to the instance of the output floating-point triangular matrix structure.
@@ -639,10 +761,95 @@ arm_status arm_mat_ldlt_f32(const arm_matrix_instance_f32 *src, arm_matrix_insta
    * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE.
    * The decomposition is returning a lower triangular matrix.
    */
-arm_status arm_mat_ldlt_f64(const arm_matrix_instance_f64 *src, arm_matrix_instance_f64 *l,
-                            arm_matrix_instance_f64 *d, uint16_t *pp);
+  arm_status arm_mat_ldlt_f64(
+  const arm_matrix_instance_f64 * src,
+  arm_matrix_instance_f64 * l,
+  arm_matrix_instance_f64 * d,
+  uint16_t * pp);
+
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension n.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+
+arm_status arm_mat_qr_f32(
+    const arm_matrix_instance_f32 * pSrc,
+    const float32_t threshold,
+    arm_matrix_instance_f32 * pOutR,
+    arm_matrix_instance_f32 * pOutQ,
+    float32_t * pOutTau,
+    float32_t *pTmpA,
+    float32_t *pTmpB
+    );
+
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.  
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension n.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+
+arm_status arm_mat_qr_f64(
+    const arm_matrix_instance_f64 * pSrc,
+    const float64_t threshold,
+    arm_matrix_instance_f64 * pOutR,
+    arm_matrix_instance_f64 * pOutQ,
+    float64_t * pOutTau,
+    float64_t *pTmpA,
+    float64_t *pTmpB
+    );
+
+/**
+  @brief         Householder transform of a floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.
+  @param[in]     blockSize   dimension of the vector space.
+  @param[outQ]   pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+float32_t arm_householder_f32(
+    const float32_t * pSrc,
+    const float32_t threshold,
+    uint32_t    blockSize,
+    float32_t * pOut
+    );
+
+/**
+  @brief         Householder transform of a double floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.
+  @param[in]     blockSize   dimension of the vector space.
+  @param[outQ]   pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+float64_t arm_householder_f64(
+    const float64_t * pSrc,
+    const float64_t threshold,
+    uint32_t    blockSize,
+    float64_t * pOut
+    );
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_functions_f16.h
old mode 100644
new mode 100755
similarity index 61%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_functions_f16.h
index 088ac5e2d46..39eb9a80175
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_functions_f16.h
@@ -23,31 +23,37 @@
  * limitations under the License.
  */
 
-#ifndef _MATRIX_FUNCTIONS_F16_H_
-#define _MATRIX_FUNCTIONS_F16_H_
+ 
+#ifndef MATRIX_FUNCTIONS_F16_H_
+#define MATRIX_FUNCTIONS_F16_H_
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
+
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
 
 #include "dsp/none.h"
 #include "dsp/utils.h"
-
+    
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+  #define DEFAULT_HOUSEHOLDER_THRESHOLD_F16 (1.0e-3f)
+
+ /**
    * @brief Instance structure for the floating-point matrix structure.
    */
-typedef struct {
-    uint16_t numRows; /**< number of rows of the matrix.     */
-    uint16_t numCols; /**< number of columns of the matrix.  */
-    float16_t *pData; /**< points to the data of the matrix. */
-} arm_matrix_instance_f16;
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float16_t *pData;     /**< points to the data of the matrix. */
+  } arm_matrix_instance_f16;
 
-/**
+ /**
    * @brief Floating-point matrix addition.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -55,10 +61,12 @@ typedef struct {
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_add_f16(const arm_matrix_instance_f16 *pSrcA,
-                           const arm_matrix_instance_f16 *pSrcB, arm_matrix_instance_f16 *pDst);
+arm_status arm_mat_add_f16(
+  const arm_matrix_instance_f16 * pSrcA,
+  const arm_matrix_instance_f16 * pSrcB,
+        arm_matrix_instance_f16 * pDst);
 
-/**
+  /**
    * @brief Floating-point, complex, matrix multiplication.
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -66,30 +74,34 @@ arm_status arm_mat_add_f16(const arm_matrix_instance_f16 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_mult_f16(const arm_matrix_instance_f16 *pSrcA,
-                                  const arm_matrix_instance_f16 *pSrcB,
-                                  arm_matrix_instance_f16 *pDst);
+arm_status arm_mat_cmplx_mult_f16(
+  const arm_matrix_instance_f16 * pSrcA,
+  const arm_matrix_instance_f16 * pSrcB,
+        arm_matrix_instance_f16 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_trans_f16(const arm_matrix_instance_f16 *pSrc, arm_matrix_instance_f16 *pDst);
+arm_status arm_mat_trans_f16(
+  const arm_matrix_instance_f16 * pSrc,
+        arm_matrix_instance_f16 * pDst);
 
-/**
+  /**
    * @brief Floating-point complex matrix transpose.
    * @param[in]  pSrc  points to the input matrix
    * @param[out] pDst  points to the output matrix
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_cmplx_trans_f16(const arm_matrix_instance_f16 *pSrc,
-                                   arm_matrix_instance_f16 *pDst);
+arm_status arm_mat_cmplx_trans_f16(
+  const arm_matrix_instance_f16 * pSrc,
+  arm_matrix_instance_f16 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix multiplication
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -97,18 +109,22 @@ arm_status arm_mat_cmplx_trans_f16(const arm_matrix_instance_f16 *pSrc,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_mult_f16(const arm_matrix_instance_f16 *pSrcA,
-                            const arm_matrix_instance_f16 *pSrcB, arm_matrix_instance_f16 *pDst);
-/**
+arm_status arm_mat_mult_f16(
+  const arm_matrix_instance_f16 * pSrcA,
+  const arm_matrix_instance_f16 * pSrcB,
+        arm_matrix_instance_f16 * pDst);
+  /**
    * @brief Floating-point matrix and vector multiplication
    * @param[in]  pSrcMat  points to the input matrix structure
    * @param[in]  pVec     points to vector
    * @param[out] pDst     points to output vector
    */
-void arm_mat_vec_mult_f16(const arm_matrix_instance_f16 *pSrcMat, const float16_t *pVec,
-                          float16_t *pDst);
+void arm_mat_vec_mult_f16(
+  const arm_matrix_instance_f16 *pSrcMat, 
+  const float16_t *pVec, 
+  float16_t *pDst);
 
-/**
+  /**
    * @brief Floating-point matrix subtraction
    * @param[in]  pSrcA  points to the first input matrix structure
    * @param[in]  pSrcB  points to the second input matrix structure
@@ -116,10 +132,12 @@ void arm_mat_vec_mult_f16(const arm_matrix_instance_f16 *pSrcMat, const float16_
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_sub_f16(const arm_matrix_instance_f16 *pSrcA,
-                           const arm_matrix_instance_f16 *pSrcB, arm_matrix_instance_f16 *pDst);
+arm_status arm_mat_sub_f16(
+  const arm_matrix_instance_f16 * pSrcA,
+  const arm_matrix_instance_f16 * pSrcB,
+        arm_matrix_instance_f16 * pDst);
 
-/**
+  /**
    * @brief Floating-point matrix scaling.
    * @param[in]  pSrc   points to the input matrix
    * @param[in]  scale  scale factor
@@ -127,29 +145,38 @@ arm_status arm_mat_sub_f16(const arm_matrix_instance_f16 *pSrcA,
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-arm_status arm_mat_scale_f16(const arm_matrix_instance_f16 *pSrc, float16_t scale,
-                             arm_matrix_instance_f16 *pDst);
+arm_status arm_mat_scale_f16(
+  const arm_matrix_instance_f16 * pSrc,
+        float16_t scale,
+        arm_matrix_instance_f16 * pDst);
 
-/**
+  /**
    * @brief  Floating-point matrix initialization.
    * @param[in,out] S         points to an instance of the floating-point matrix structure.
    * @param[in]     nRows     number of rows in the matrix.
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-void arm_mat_init_f16(arm_matrix_instance_f16 *S, uint16_t nRows, uint16_t nColumns,
-                      float16_t *pData);
+void arm_mat_init_f16(
+        arm_matrix_instance_f16 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        float16_t * pData);
 
-/**
+
+  /**
    * @brief Floating-point matrix inverse.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] dst   points to the instance of the output floating-point matrix structure.
    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
    */
-arm_status arm_mat_inverse_f16(const arm_matrix_instance_f16 *src, arm_matrix_instance_f16 *dst);
+  arm_status arm_mat_inverse_f16(
+  const arm_matrix_instance_f16 * src,
+  arm_matrix_instance_f16 * dst);
 
-/**
+
+ /**
    * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix.
    * @param[in]  src   points to the instance of the input floating-point matrix structure.
    * @param[out] dst   points to the instance of the output floating-point matrix structure.
@@ -158,32 +185,76 @@ arm_status arm_mat_inverse_f16(const arm_matrix_instance_f16 *src, arm_matrix_in
    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
    * The decomposition is returning a lower triangular matrix.
    */
-arm_status arm_mat_cholesky_f16(const arm_matrix_instance_f16 *src, arm_matrix_instance_f16 *dst);
+  arm_status arm_mat_cholesky_f16(
+  const arm_matrix_instance_f16 * src,
+  arm_matrix_instance_f16 * dst);
 
-/**
+ /**
    * @brief Solve UT . X = A where UT is an upper triangular matrix
    * @param[in]  ut  The upper triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of UT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
   */
-arm_status arm_mat_solve_upper_triangular_f16(const arm_matrix_instance_f16 *ut,
-                                              const arm_matrix_instance_f16 *a,
-                                              arm_matrix_instance_f16 *dst);
+  arm_status arm_mat_solve_upper_triangular_f16(
+  const arm_matrix_instance_f16 * ut,
+  const arm_matrix_instance_f16 * a,
+  arm_matrix_instance_f16 * dst);
 
-/**
+ /**
    * @brief Solve LT . X = A where LT is a lower triangular matrix
    * @param[in]  lt  The lower triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of LT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
    */
-arm_status arm_mat_solve_lower_triangular_f16(const arm_matrix_instance_f16 *lt,
-                                              const arm_matrix_instance_f16 *a,
-                                              arm_matrix_instance_f16 *dst);
+  arm_status arm_mat_solve_lower_triangular_f16(
+  const arm_matrix_instance_f16 * lt,
+  const arm_matrix_instance_f16 * a,
+  arm_matrix_instance_f16 * dst);
+
+
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.  
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension n.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+arm_status arm_mat_qr_f16(
+    const arm_matrix_instance_f16 * pSrc,
+    const float16_t threshold,
+    arm_matrix_instance_f16 * pOutR,
+    arm_matrix_instance_f16 * pOutQ,
+    float16_t * pOutTau,
+    float16_t *pTmpA,
+    float16_t *pTmpB
+    );
+
+/**
+  @brief         Householder transform of a half floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.  
+  @param[in]     blockSize   dimension of the vector space.
+  @param[outQ]   pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+float16_t arm_householder_f16(
+    const float16_t * pSrc,
+    const float16_t threshold,
+    uint32_t    blockSize,
+    float16_t * pOut
+    );
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_utils.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_utils.h
new file mode 100755
index 00000000000..79e7f8cf103
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/matrix_utils.h
@@ -0,0 +1,640 @@
+/******************************************************************************
+ * @file     matrix_utils.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.11.0
+ * @date     30 May 2022
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2022 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef MATRIX_UTILS_H_
+#define MATRIX_UTILS_H_
+
+#include "arm_math_types.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#define ELEM(A,ROW,COL) &((A)->pData[(A)->numCols* (ROW) + (COL)])
+
+#define SCALE_COL_T(T,CAST,A,ROW,v,i)        \
+{                                       \
+  int32_t _w;                            \
+  T *data = (A)->pData;                 \
+  const int32_t _numCols = (A)->numCols; \
+  const int32_t nb = (A)->numRows - ROW;\
+                                        \
+  data += i + _numCols * (ROW);          \
+                                        \
+  for(_w=0;_w < nb; _w++)                  \
+  {                                     \
+     *data *= CAST v;                   \
+     data += _numCols;                   \
+  }                                     \
+}
+
+#define COPY_COL_T(T,A,ROW,COL,DST)               \
+{                                                 \
+    uint32_t _row;                                \
+    T *_pb=DST;                                    \
+    T *_pa = (A)->pData + ROW * (A)->numCols + COL;\
+    for(_row = ROW; _row < (A)->numRows; _row ++) \
+    {                                             \
+         *_pb++ = *_pa;                             \
+         _pa += (A)->numCols;                      \
+    }                                             \
+}
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define SWAP_ROWS_F16(A,COL,i,j)                  \
+  {                                               \
+    int cnt = ((A)->numCols)-(COL);               \
+    int32_t _w;                                    \
+    float16_t *data = (A)->pData;                 \
+    const int32_t _numCols = (A)->numCols;        \
+                                                  \
+    for(_w=(COL);_w < _numCols; _w+=8)               \
+    {                                             \
+       f16x8_t tmpa,tmpb;                         \
+       mve_pred16_t p0 = vctp16q(cnt);            \
+                                                  \
+       tmpa=vldrhq_z_f16(&data[i*_numCols + _w],p0);\
+       tmpb=vldrhq_z_f16(&data[j*_numCols + _w],p0);\
+                                                  \
+       vstrhq_p(&data[i*_numCols + _w], tmpb, p0);  \
+       vstrhq_p(&data[j*_numCols + _w], tmpa, p0);  \
+                                                  \
+       cnt -= 8;                                  \
+    }                                             \
+  }
+
+#define SCALE_ROW_F16(A,COL,v,i)                   \
+{                                                   \
+  int cnt = ((A)->numCols)-(COL);                   \
+  int32_t _w;                                       \
+  float16_t *data = (A)->pData;                     \
+  const int32_t _numCols = (A)->numCols;            \
+                                                    \
+  for(_w=(COL);_w < _numCols; _w+=8)                    \
+  {                                                 \
+       f16x8_t tmpa;                                \
+       mve_pred16_t p0 = vctp16q(cnt);              \
+       tmpa = vldrhq_z_f16(&data[i*_numCols + _w],p0);\
+       tmpa = vmulq_n_f16(tmpa,(_Float16)v);                  \
+       vstrhq_p(&data[i*_numCols + _w], tmpa, p0);    \
+       cnt -= 8;                                    \
+  }                                                 \
+                                                    \
+}
+
+#define MAC_ROW_F16(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  int32_t _w;                                        \
+  float16_t *dataA = (A)->pData;                     \
+  float16_t *dataB = (B)->pData;                     \
+  const int32_t _numCols = (A)->numCols;             \
+                                                     \
+  for(_w=(COL);_w < _numCols; _w+=8)                     \
+  {                                                  \
+       f16x8_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp16q(cnt);               \
+       tmpa = vldrhq_z_f16(&dataA[i*_numCols + _w],p0);\
+       tmpb = vldrhq_z_f16(&dataB[j*_numCols + _w],p0);\
+       tmpa = vfmaq_n_f16(tmpa,tmpb,v);              \
+       vstrhq_p(&dataA[i*_numCols + _w], tmpa, p0);    \
+       cnt -= 8;                                     \
+  }                                                  \
+                                                     \
+}
+
+#define MAS_ROW_F16(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  int32_t _w;                                        \
+  float16_t *dataA = (A)->pData;                     \
+  float16_t *dataB = (B)->pData;                     \
+  const int32_t _numCols = (A)->numCols;             \
+  f16x8_t vec=vdupq_n_f16(v);                        \
+                                                     \
+  for(_w=(COL);_w < _numCols; _w+=8)                     \
+  {                                                  \
+       f16x8_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp16q(cnt);               \
+       tmpa = vldrhq_z_f16(&dataA[i*_numCols + _w],p0);\
+       tmpb = vldrhq_z_f16(&dataB[j*_numCols + _w],p0);\
+       tmpa = vfmsq_f16(tmpa,tmpb,vec);              \
+       vstrhq_p(&dataA[i*_numCols + _w], tmpa, p0);    \
+       cnt -= 8;                                     \
+  }                                                  \
+                                                     \
+}
+
+#else
+
+
+#define SWAP_ROWS_F16(A,COL,i,j)       \
+{                                      \
+  int32_t _w;                           \
+  float16_t *dataI = (A)->pData;       \
+  float16_t *dataJ = (A)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);    \
+                                       \
+  dataI += i*_numCols + (COL);          \
+  dataJ += j*_numCols + (COL);          \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     float16_t tmp;                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define SCALE_ROW_F16(A,COL,v,i)       \
+{                                      \
+  int32_t _w;                           \
+  float16_t *data = (A)->pData;        \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);    \
+                                       \
+  data += i*_numCols + (COL);           \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     *data++ *= (_Float16)v;           \
+  }                                    \
+}
+
+
+#define MAC_ROW_F16(COL,A,i,v,B,j)                \
+{                                                 \
+  int32_t _w;                                      \
+  float16_t *dataA = (A)->pData;                  \
+  float16_t *dataB = (B)->pData;                  \
+  const int32_t _numCols = (A)->numCols;           \
+  const int32_t nb = _numCols-(COL);               \
+                                                  \
+  dataA += i*_numCols + (COL);                     \
+  dataB += j*_numCols + (COL);                     \
+                                                  \
+  for(_w=0;_w < nb; _w++)                            \
+  {                                               \
+     *dataA++ += (_Float16)v * (_Float16)*dataB++;\
+  }                                               \
+}
+
+#define MAS_ROW_F16(COL,A,i,v,B,j)                \
+{                                                 \
+  int32_t _w;                                      \
+  float16_t *dataA = (A)->pData;                  \
+  float16_t *dataB = (B)->pData;                  \
+  const int32_t _numCols = (A)->numCols;           \
+  const int32_t nb = _numCols-(COL);               \
+                                                  \
+  dataA += i*_numCols + (COL);                     \
+  dataB += j*_numCols + (COL);                     \
+                                                  \
+  for(_w=0;_w < nb; _w++)                            \
+  {                                               \
+     *dataA++ -= (_Float16)v * (_Float16)*dataB++;\
+  }                                               \
+}
+
+#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+/* Functions with only a scalar version */
+#define COPY_COL_F16(A,ROW,COL,DST) \
+  COPY_COL_T(float16_t,A,ROW,COL,DST)
+
+#define SCALE_COL_F16(A,ROW,v,i)        \
+  SCALE_COL_T(float16_t,(_Float16),A,ROW,v,i)
+  
+#endif /* defined(ARM_FLOAT16_SUPPORTED)*/
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define SWAP_ROWS_F32(A,COL,i,j)                  \
+  {                                               \
+    int cnt = ((A)->numCols)-(COL);               \
+    float32_t *data = (A)->pData;                 \
+    const int32_t _numCols = (A)->numCols;        \
+    int32_t _w;                                   \
+                                                  \
+    for(_w=(COL);_w < _numCols; _w+=4)                \
+    {                                             \
+       f32x4_t tmpa,tmpb;                         \
+       mve_pred16_t p0 = vctp32q(cnt);            \
+                                                  \
+       tmpa=vldrwq_z_f32(&data[i*_numCols + _w],p0);\
+       tmpb=vldrwq_z_f32(&data[j*_numCols + _w],p0);\
+                                                  \
+       vstrwq_p(&data[i*_numCols + _w], tmpb, p0);  \
+       vstrwq_p(&data[j*_numCols + _w], tmpa, p0);  \
+                                                  \
+       cnt -= 4;                                  \
+    }                                             \
+  }
+
+#define MAC_ROW_F32(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  float32_t *dataA = (A)->pData;                     \
+  float32_t *dataB = (B)->pData;                     \
+  const int32_t _numCols = (A)->numCols;             \
+  int32_t _w;                                        \
+                                                     \
+  for(_w=(COL);_w < _numCols; _w+=4)                     \
+  {                                                  \
+       f32x4_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp32q(cnt);               \
+       tmpa = vldrwq_z_f32(&dataA[i*_numCols + _w],p0);\
+       tmpb = vldrwq_z_f32(&dataB[j*_numCols + _w],p0);\
+       tmpa = vfmaq_n_f32(tmpa,tmpb,v);              \
+       vstrwq_p(&dataA[i*_numCols + _w], tmpa, p0);    \
+       cnt -= 4;                                     \
+  }                                                  \
+                                                     \
+}
+
+#define MAS_ROW_F32(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  float32_t *dataA = (A)->pData;                     \
+  float32_t *dataB = (B)->pData;                     \
+  const int32_t _numCols = (A)->numCols;             \
+  int32_t _w;                                        \
+  f32x4_t vec=vdupq_n_f32(v);                        \
+                                                     \
+  for(_w=(COL);_w < _numCols; _w+=4)                     \
+  {                                                  \
+       f32x4_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp32q(cnt);               \
+       tmpa = vldrwq_z_f32(&dataA[i*_numCols + _w],p0);\
+       tmpb = vldrwq_z_f32(&dataB[j*_numCols + _w],p0);\
+       tmpa = vfmsq_f32(tmpa,tmpb,vec);              \
+       vstrwq_p(&dataA[i*_numCols + _w], tmpa, p0);    \
+       cnt -= 4;                                     \
+  }                                                  \
+                                                     \
+}
+
+#define SCALE_ROW_F32(A,COL,v,i)                    \
+{                                                   \
+  int cnt = ((A)->numCols)-(COL);                   \
+  float32_t *data = (A)->pData;                     \
+  const int32_t _numCols = (A)->numCols;            \
+  int32_t _w;                                       \
+                                                    \
+  for(_w=(COL);_w < _numCols; _w+=4)                    \
+  {                                                 \
+       f32x4_t tmpa;                                \
+       mve_pred16_t p0 = vctp32q(cnt);              \
+       tmpa = vldrwq_z_f32(&data[i*_numCols + _w],p0);\
+       tmpa = vmulq_n_f32(tmpa,v);                  \
+       vstrwq_p(&data[i*_numCols + _w], tmpa, p0);    \
+       cnt -= 4;                                    \
+  }                                                 \
+                                                    \
+}
+
+#elif defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define SWAP_ROWS_F32(A,COL,i,j)       \
+{                                      \
+  int32_t _w;                           \
+  float32_t *dataI = (A)->pData;       \
+  float32_t *dataJ = (A)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols - COL;    \
+                                       \
+  dataI += i*_numCols + (COL);          \
+  dataJ += j*_numCols + (COL);          \
+                                       \
+  float32_t tmp;                       \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define MAC_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols - (COL);  \
+  int32_t nbElems;                     \
+  f32x4_t vec = vdupq_n_f32(v);        \
+                                       \
+  nbElems = nb >> 2;                   \
+                                       \
+  dataA += i*_numCols + (COL);          \
+  dataB += j*_numCols + (COL);          \
+                                       \
+  while(nbElems>0)                     \
+  {                                    \
+       f32x4_t tmpa,tmpb;              \
+       tmpa = vld1q_f32(dataA,p0);     \
+       tmpb = vld1q_f32(dataB,p0);     \
+       tmpa = vmlaq_f32(tmpa,tmpb,vec);\
+       vst1q_f32(dataA, tmpa, p0);     \
+       nbElems--;                      \
+       dataA += 4;                     \
+       dataB += 4;                     \
+  }                                    \
+                                       \
+  nbElems = nb & 3;                    \
+  while(nbElems > 0)                   \
+  {                                    \
+     *dataA++ += v* *dataB++;          \
+     nbElems--;                        \
+  }                                    \
+}
+
+#define MAS_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols - (COL);  \
+  int32_t nbElems;                     \
+  f32x4_t vec = vdupq_n_f32(v);        \
+                                       \
+  nbElems = nb >> 2;                   \
+                                       \
+  dataA += i*_numCols + (COL);          \
+  dataB += j*_numCols + (COL);          \
+                                       \
+  while(nbElems>0)                     \
+  {                                    \
+       f32x4_t tmpa,tmpb;              \
+       tmpa = vld1q_f32(dataA);        \
+       tmpb = vld1q_f32(dataB);        \
+       tmpa = vmlsq_f32(tmpa,tmpb,vec);\
+       vst1q_f32(dataA, tmpa);         \
+       nbElems--;                      \
+       dataA += 4;                     \
+       dataB += 4;                     \
+  }                                    \
+                                       \
+  nbElems = nb & 3;                    \
+  while(nbElems > 0)                   \
+  {                                    \
+     *dataA++ -= v* *dataB++;          \
+     nbElems--;                        \
+  }                                    \
+}
+
+#define SCALE_ROW_F32(A,COL,v,i)        \
+{                                       \
+  float32_t *data = (A)->pData;         \
+  const int32_t _numCols = (A)->numCols; \
+  const int32_t nb = _numCols - (COL);   \
+  int32_t nbElems;                      \
+  f32x4_t vec = vdupq_n_f32(v);         \
+                                        \
+  nbElems = nb >> 2;                    \
+                                        \
+  data += i*_numCols + (COL);            \
+  while(nbElems>0)                      \
+  {                                     \
+       f32x4_t tmpa;                    \
+       tmpa = vld1q_f32(data);          \
+       tmpa = vmulq_f32(tmpa,vec);      \
+       vst1q_f32(data, tmpa);           \
+       data += 4;                       \
+       nbElems --;                      \
+  }                                     \
+                                        \
+  nbElems = nb & 3;                     \
+  while(nbElems > 0)                    \
+  {                                     \
+     *data++ *= v;                      \
+     nbElems--;                         \
+  }                                     \
+                                        \
+}
+
+#else
+
+#define SWAP_ROWS_F32(A,COL,i,j)       \
+{                                      \
+  int32_t _w;                           \
+  float32_t tmp;                       \
+  float32_t *dataI = (A)->pData;       \
+  float32_t *dataJ = (A)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols - COL;    \
+                                       \
+  dataI += i*_numCols + (COL);          \
+  dataJ += j*_numCols + (COL);          \
+                                       \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define SCALE_ROW_F32(A,COL,v,i)       \
+{                                      \
+  int32_t _w;                           \
+  float32_t *data = (A)->pData;        \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols - COL;    \
+                                       \
+  data += i*_numCols + (COL);           \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     *data++ *= v;                     \
+  }                                    \
+}
+
+
+#define MAC_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  int32_t _w;                           \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);    \
+                                       \
+  dataA = dataA + i*_numCols + (COL);   \
+  dataB = dataB + j*_numCols + (COL);   \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     *dataA++ += v* *dataB++;          \
+  }                                    \
+}
+
+#define MAS_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  int32_t _w;                           \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);    \
+                                       \
+  dataA = dataA + i*_numCols + (COL);   \
+  dataB = dataB + j*_numCols + (COL);   \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     *dataA++ -= v* *dataB++;          \
+  }                                    \
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/* Functions _with only a scalar version */
+
+#define COPY_COL_F32(A,ROW,COL,DST) \
+  COPY_COL_T(float32_t,A,ROW,COL,DST)
+
+#define COPY_COL_F64(A,ROW,COL,DST) \
+  COPY_COL_T(float64_t,A,ROW,COL,DST)
+
+#define SWAP_COLS_F32(A,COL,i,j)               \
+{                                              \
+  int32_t _w;                                  \
+  float32_t *data = (A)->pData;                \
+  const int32_t _numCols = (A)->numCols;       \
+  for(_w=(COL);_w < _numCols; _w++)                \
+  {                                            \
+     float32_t tmp;                            \
+     tmp = data[_w*_numCols + i];                \
+     data[_w*_numCols + i] = data[_w*_numCols + j];\
+     data[_w*_numCols + j] = tmp;                \
+  }                                            \
+}
+
+#define SCALE_COL_F32(A,ROW,v,i)        \
+  SCALE_COL_T(float32_t,,A,ROW,v,i)
+
+#define SWAP_ROWS_F64(A,COL,i,j)       \
+{                                      \
+  int32_t _w;                           \
+  float64_t *dataI = (A)->pData;       \
+  float64_t *dataJ = (A)->pData;       \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);    \
+                                       \
+  dataI += i*_numCols + (COL);          \
+  dataJ += j*_numCols + (COL);          \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     float64_t tmp;                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define SWAP_COLS_F64(A,COL,i,j)               \
+{                                              \
+  int32_t _w;                                  \
+  float64_t *data = (A)->pData;                \
+  const int32_t _numCols = (A)->numCols;       \
+  for(_w=(COL);_w < _numCols; _w++)                \
+  {                                            \
+     float64_t tmp;                            \
+     tmp = data[_w*_numCols + i];                \
+     data[_w*_numCols + i] = data[_w*_numCols + j];\
+     data[_w*_numCols + j] = tmp;                \
+  }                                            \
+}
+
+#define SCALE_ROW_F64(A,COL,v,i)       \
+{                                      \
+  int32_t _w;                           \
+  float64_t *data = (A)->pData;        \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);    \
+                                       \
+  data += i*_numCols + (COL);           \
+                                       \
+  for(_w=0;_w < nb; _w++)                 \
+  {                                    \
+     *data++ *= v;                     \
+  }                                    \
+}
+
+#define SCALE_COL_F64(A,ROW,v,i)        \
+  SCALE_COL_T(float64_t,,A,ROW,v,i)
+
+#define MAC_ROW_F64(COL,A,i,v,B,j)      \
+{                                       \
+  int32_t _w;                           \
+  float64_t *dataA = (A)->pData;        \
+  float64_t *dataB = (B)->pData;        \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);     \
+                                        \
+  dataA += i*_numCols + (COL);           \
+  dataB += j*_numCols + (COL);           \
+                                        \
+  for(_w=0;_w < nb; _w++)                  \
+  {                                     \
+     *dataA++ += v* *dataB++;           \
+  }                                     \
+}
+
+#define MAS_ROW_F64(COL,A,i,v,B,j)      \
+{                                       \
+  int32_t _w;                           \
+  float64_t *dataA = (A)->pData;        \
+  float64_t *dataB = (B)->pData;        \
+  const int32_t _numCols = (A)->numCols;\
+  const int32_t nb = _numCols-(COL);     \
+                                        \
+  dataA += i*_numCols + (COL);           \
+  dataB += j*_numCols + (COL);           \
+                                        \
+  for(_w=0;_w < nb; _w++)                  \
+  {                                     \
+     *dataA++ -= v* *dataB++;           \
+  }                                     \
+}
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _MATRIX_UTILS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/none.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/none.h
new file mode 100755
index 00000000000..7551ee95ed8
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/none.h
@@ -0,0 +1,576 @@
+/******************************************************************************
+ * @file     none.h
+ * @brief    Intrinsincs when no DSP extension available
+ * @version  V1.9.0
+ * @date     20. July 2020
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+Definitions in this file are allowing to reuse some versions of the
+CMSIS-DSP to build on a core (M0 for instance) or a host where
+DSP extension are not available.
+
+Ideally a pure C version should have been used instead.
+But those are not always available or use a restricted set
+of intrinsics.
+
+*/
+ 
+#ifndef NONE_H_
+#define NONE_H_
+
+#include "arm_math_types.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+ 
+
+/*
+
+Normally those kind of definitions are in a compiler file
+in Core or Core_A.
+
+But for MSVC compiler it is a bit special. The goal is very specific
+to CMSIS-DSP and only to allow the use of this library from other
+systems like Python or Matlab.
+
+MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
+compiler file in Core or Core_A would not make sense.
+
+*/
+#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
+    __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
+    {
+      if (data == 0U) { return 32U; }
+
+      uint32_t count = 0U;
+      uint32_t mask = 0x80000000U;
+
+      while ((data & mask) == 0U)
+      {
+        count += 1U;
+        mask = mask >> 1U;
+      }
+      return count;
+    }
+
+  __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+  {
+    if ((sat >= 1U) && (sat <= 32U))
+    {
+      const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+      const int32_t min = -1 - max ;
+      if (val > max)
+      {
+        return max;
+      }
+      else if (val < min)
+      {
+        return min;
+      }
+    }
+    return val;
+  }
+
+  __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+  {
+    if (sat <= 31U)
+    {
+      const uint32_t max = ((1U << sat) - 1U);
+      if (val > (int32_t)max)
+      {
+        return max;
+      }
+      else if (val < 0)
+      {
+        return 0U;
+      }
+    }
+    return (uint32_t)val;
+  }
+
+ /**
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
+ */
+__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+{
+  op2 %= 32U;
+  if (op2 == 0U)
+  {
+    return op1;
+  }
+  return (op1 >> op2) | (op1 << (32U - op2));
+}
+
+
+#endif
+
+/**
+   * @brief Clips Q63 to Q31 values.
+   */
+  __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
+  q63_t x)
+  {
+    return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
+      ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
+  }
+
+  /**
+   * @brief Clips Q63 to Q15 values.
+   */
+  __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
+  q63_t x)
+  {
+    return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
+      ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
+  }
+
+  /**
+   * @brief Clips Q31 to Q7 values.
+   */
+  __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
+  q31_t x)
+  {
+    return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
+      ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
+  }
+
+  /**
+   * @brief Clips Q31 to Q15 values.
+   */
+  __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
+  q31_t x)
+  {
+    return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
+      ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
+  }
+
+  /**
+   * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
+   */
+  __STATIC_FORCEINLINE q63_t mult32x64(
+  q63_t x,
+  q31_t y)
+  {
+    return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
+            (((q63_t) (x >> 32)                * y)      )  );
+  }
+
+/* SMMLAR */
+#define multAcc_32x32_keep32_R(a, x, y) \
+    a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
+
+/* SMMLSR */
+#define multSub_32x32_keep32_R(a, x, y) \
+    a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
+
+/* SMMULR */
+#define mult_32x32_keep32_R(a, x, y) \
+    a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
+
+/* SMMLA */
+#define multAcc_32x32_keep32(a, x, y) \
+    a += (q31_t) (((q63_t) x * y) >> 32)
+
+/* SMMLS */
+#define multSub_32x32_keep32(a, x, y) \
+    a -= (q31_t) (((q63_t) x * y) >> 32)
+
+/* SMMUL */
+#define mult_32x32_keep32(a, x, y) \
+    a = (q31_t) (((q63_t) x * y ) >> 32)
+
+#ifndef ARM_MATH_DSP
+  /**
+   * @brief definition to pack two 16 bit values.
+   */
+  #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
+                                      (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
+  #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
+                                      (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
+#endif
+
+   /**
+   * @brief definition to pack four 8 bit values.
+   */
+#ifndef ARM_MATH_BIG_ENDIAN
+  #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
+                                  (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
+                                  (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
+                                  (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
+#else
+  #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
+                                  (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
+                                  (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
+                                  (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
+#endif
+
+
+ 
+
+/*
+ * @brief C custom defined intrinsic functions
+ */
+#if !defined (ARM_MATH_DSP)
+
+
+  /*
+   * @brief C custom defined QADD8
+   */
+  __STATIC_FORCEINLINE uint32_t __QADD8(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s, t, u;
+
+    r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QSUB8
+   */
+  __STATIC_FORCEINLINE uint32_t __QSUB8(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s, t, u;
+
+    r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QADD16
+   */
+  __STATIC_FORCEINLINE uint32_t __QADD16(
+  uint32_t x,
+  uint32_t y)
+  {
+/*  q31_t r,     s;  without initialisation 'arm_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
+    q31_t r = 0, s = 0;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHADD16
+   */
+  __STATIC_FORCEINLINE uint32_t __SHADD16(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QSUB16
+   */
+  __STATIC_FORCEINLINE uint32_t __QSUB16(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHSUB16
+   */
+  __STATIC_FORCEINLINE uint32_t __SHSUB16(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QASX
+   */
+  __STATIC_FORCEINLINE uint32_t __QASX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHASX
+   */
+  __STATIC_FORCEINLINE uint32_t __SHASX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined QSAX
+   */
+  __STATIC_FORCEINLINE uint32_t __QSAX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SHSAX
+   */
+  __STATIC_FORCEINLINE uint32_t __SHSAX(
+  uint32_t x,
+  uint32_t y)
+  {
+    q31_t r, s;
+
+    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
+  }
+
+
+  /*
+   * @brief C custom defined SMUSDX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUSDX(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
+  }
+
+  /*
+   * @brief C custom defined SMUADX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUADX(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
+  }
+
+
+  /*
+   * @brief C custom defined QADD
+   */
+  __STATIC_FORCEINLINE int32_t __QADD(
+  int32_t x,
+  int32_t y)
+  {
+    return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
+  }
+
+
+  /*
+   * @brief C custom defined QSUB
+   */
+  __STATIC_FORCEINLINE int32_t __QSUB(
+  int32_t x,
+  int32_t y)
+  {
+    return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
+  }
+
+
+  /*
+   * @brief C custom defined SMLAD
+   */
+  __STATIC_FORCEINLINE uint32_t __SMLAD(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLADX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMLADX(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLSDX
+   */
+  __STATIC_FORCEINLINE uint32_t __SMLSDX(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLALD
+   */
+  __STATIC_FORCEINLINE uint64_t __SMLALD(
+  uint32_t x,
+  uint32_t y,
+  uint64_t sum)
+  {
+/*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
+    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ( ((q63_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMLALDX
+   */
+  __STATIC_FORCEINLINE uint64_t __SMLALDX(
+  uint32_t x,
+  uint32_t y,
+  uint64_t sum)
+  {
+/*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
+    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q63_t)sum    )                                  )   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMUAD
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUAD(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
+  }
+
+
+  /*
+   * @brief C custom defined SMUSD
+   */
+  __STATIC_FORCEINLINE uint32_t __SMUSD(
+  uint32_t x,
+  uint32_t y)
+  {
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
+  }
+
+
+  /*
+   * @brief C custom defined SXTB16
+   */
+  __STATIC_FORCEINLINE uint32_t __SXTB16(
+  uint32_t x)
+  {
+    return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
+                       ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
+  }
+
+  /*
+   * @brief C custom defined SMMLA
+   */
+  __STATIC_FORCEINLINE int32_t __SMMLA(
+  int32_t x,
+  int32_t y,
+  int32_t sum)
+  {
+    return (sum + (int32_t) (((int64_t) x * y) >> 32));
+  }
+
+#endif /* !defined (ARM_MATH_DSP) */
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/quaternion_math_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/quaternion_math_functions.h
old mode 100644
new mode 100755
similarity index 81%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/quaternion_math_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/quaternion_math_functions.h
index 33cc3b0c7a2..6c823a368b6
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/quaternion_math_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/quaternion_math_functions.h
@@ -24,8 +24,9 @@
  * limitations under the License.
  */
 
-#ifndef _QUATERNION_MATH_FUNCTIONS_H_
-#define _QUATERNION_MATH_FUNCTIONS_H_
+ 
+#ifndef QUATERNION_MATH_FUNCTIONS_H_
+#define QUATERNION_MATH_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -33,8 +34,10 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
@@ -43,56 +46,61 @@ extern "C" {
  * rotation and quaternion representation.
  */
 
+
 /**
   @brief         Floating-point quaternion Norm.
   @param[in]     pInputQuaternions       points to the input vector of quaternions
   @param[out]    pNorms                  points to the output vector of norms
   @param[in]     nbQuaternions           number of quaternions in each vector
-  @return        none
  */
+void arm_quaternion_norm_f32(const float32_t *pInputQuaternions, 
+    float32_t *pNorms,
+    uint32_t nbQuaternions);
 
-void arm_quaternion_norm_f32(const float32_t *pInputQuaternions, float32_t *pNorms,
-                             uint32_t nbQuaternions);
 
 /**
   @brief         Floating-point quaternion inverse.
   @param[in]     pInputQuaternions            points to the input vector of quaternions
   @param[out]    pInverseQuaternions          points to the output vector of inverse quaternions
   @param[in]     nbQuaternions                number of quaternions in each vector
-  @return        none
  */
+void arm_quaternion_inverse_f32(const float32_t *pInputQuaternions, 
+    float32_t *pInverseQuaternions, 
+    uint32_t nbQuaternions);
 
-void arm_quaternion_inverse_f32(const float32_t *pInputQuaternions, float32_t *pInverseQuaternions,
-                                uint32_t nbQuaternions);
 
 /**
   @brief         Floating-point quaternion conjugates.
   @param[in]     pInputQuaternions            points to the input vector of quaternions
   @param[out]    pConjugateQuaternions        points to the output vector of conjugate quaternions
   @param[in]     nbQuaternions                number of quaternions in each vector
-  @return        none
  */
-void arm_quaternion_conjugate_f32(const float32_t *inputQuaternions,
-                                  float32_t *pConjugateQuaternions, uint32_t nbQuaternions);
+void arm_quaternion_conjugate_f32(const float32_t *inputQuaternions, 
+    float32_t *pConjugateQuaternions, 
+    uint32_t nbQuaternions);
+
 
 /**
   @brief         Floating-point normalization of quaternions.
   @param[in]     pInputQuaternions            points to the input vector of quaternions
   @param[out]    pNormalizedQuaternions       points to the output vector of normalized quaternions
   @param[in]     nbQuaternions                number of quaternions in each vector
-  @return        none
  */
-void arm_quaternion_normalize_f32(const float32_t *inputQuaternions,
-                                  float32_t *pNormalizedQuaternions, uint32_t nbQuaternions);
+void arm_quaternion_normalize_f32(const float32_t *inputQuaternions, 
+    float32_t *pNormalizedQuaternions, 
+    uint32_t nbQuaternions);
+
 
 /**
   @brief         Floating-point product of two quaternions.
   @param[in]     qa       First quaternion
   @param[in]     qb       Second quaternion
   @param[out]    r        Product of two quaternions
-  @return        none
  */
-void arm_quaternion_product_single_f32(const float32_t *qa, const float32_t *qb, float32_t *r);
+void arm_quaternion_product_single_f32(const float32_t *qa, 
+    const float32_t *qb, 
+    float32_t *r);
+
 
 /**
   @brief         Floating-point elementwise product two quaternions.
@@ -100,17 +108,18 @@ void arm_quaternion_product_single_f32(const float32_t *qa, const float32_t *qb,
   @param[in]     qb                  Second array of quaternions
   @param[out]    r                   Elementwise product of quaternions
   @param[in]     nbQuaternions       Number of quaternions in the array
-  @return        none
  */
-void arm_quaternion_product_f32(const float32_t *qa, const float32_t *qb, float32_t *r,
-                                uint32_t nbQuaternions);
+void arm_quaternion_product_f32(const float32_t *qa, 
+    const float32_t *qb, 
+    float32_t *r,
+    uint32_t nbQuaternions);
+
 
 /**
  * @brief Conversion of quaternion to equivalent rotation matrix.
  * @param[in]       pInputQuaternions points to an array of normalized quaternions
  * @param[out]      pOutputRotations points to an array of 3x3 rotations (in row order)
  * @param[in]       nbQuaternions in the array
- * @return none.
  *
  * <b>Format of rotation matrix</b>
  * \par
@@ -121,20 +130,23 @@ void arm_quaternion_product_f32(const float32_t *qa, const float32_t *qb, float3
  *
  * Rotation matrix is saved in row order : R00 R01 R02 R10 R11 R12 R20 R21 R22
  */
-void arm_quaternion2rotation_f32(const float32_t *pInputQuaternions, float32_t *pOutputRotations,
-                                 uint32_t nbQuaternions);
+void arm_quaternion2rotation_f32(const float32_t *pInputQuaternions, 
+    float32_t *pOutputRotations, 
+    uint32_t nbQuaternions);
+
 
 /**
  * @brief Conversion of a rotation matrix to equivalent quaternion.
  * @param[in]       pInputRotations points to an array 3x3 rotation matrix (in row order)
  * @param[out]      pOutputQuaternions points to an array of quaternions
  * @param[in]       nbQuaternions in the array
- * @return none.
 */
-void arm_rotation2quaternion_f32(const float32_t *pInputRotations, float32_t *pOutputQuaternions,
-                                 uint32_t nbQuaternions);
+void arm_rotation2quaternion_f32(const float32_t *pInputRotations, 
+    float32_t *pOutputQuaternions,  
+    uint32_t nbQuaternions);
+
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/statistics_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/statistics_functions.h
old mode 100644
new mode 100755
similarity index 68%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/statistics_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/statistics_functions.h
index e6423265468..301aadd023c
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/statistics_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/statistics_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     statistics_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     14 July 2022
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _STATISTICS_FUNCTIONS_H_
-#define _STATISTICS_FUNCTIONS_H_
+ 
+#ifndef STATISTICS_FUNCTIONS_H_
+#define STATISTICS_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -35,10 +36,12 @@
 #include "dsp/basic_math_functions.h"
 #include "dsp/fast_math_functions.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
+
 /**
  * @defgroup groupStats Statistics Functions
  */
@@ -67,6 +70,7 @@ extern "C" {
  *
  */
 
+
 float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize);
 
 /**
@@ -82,8 +86,11 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize);
  *
  */
 
-float32_t arm_logsumexp_dot_prod_f32(const float32_t *pSrcA, const float32_t *pSrcB,
-                                     uint32_t blockSize, float32_t *pTmpBuffer);
+
+float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA,
+  const float32_t * pSrcB,
+  uint32_t blockSize,
+  float32_t *pTmpBuffer);
 
 /**
  * @brief Entropy
@@ -94,7 +101,9 @@ float32_t arm_logsumexp_dot_prod_f32(const float32_t *pSrcA, const float32_t *pS
  *
  */
 
-float32_t arm_entropy_f32(const float32_t *pSrcA, uint32_t blockSize);
+
+float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize);
+
 
 /**
  * @brief Entropy
@@ -105,7 +114,9 @@ float32_t arm_entropy_f32(const float32_t *pSrcA, uint32_t blockSize);
  *
  */
 
-float64_t arm_entropy_f64(const float64_t *pSrcA, uint32_t blockSize);
+
+float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize);
+
 
 /**
  * @brief Kullback-Leibler
@@ -116,8 +127,10 @@ float64_t arm_entropy_f64(const float64_t *pSrcA, uint32_t blockSize);
  * @return Kullback-Leibler  Divergence D(A || B)
  *
  */
-float32_t arm_kullback_leibler_f32(const float32_t *pSrcA, const float32_t *pSrcB,
-                                   uint32_t blockSize);
+float32_t arm_kullback_leibler_f32(const float32_t * pSrcA
+  ,const float32_t * pSrcB
+  ,uint32_t blockSize);
+
 
 /**
  * @brief Kullback-Leibler
@@ -128,211 +141,314 @@ float32_t arm_kullback_leibler_f32(const float32_t *pSrcA, const float32_t *pSrc
  * @return Kullback-Leibler  Divergence D(A || B)
  *
  */
-float64_t arm_kullback_leibler_f64(const float64_t *pSrcA, const float64_t *pSrcB,
-                                   uint32_t blockSize);
+float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, 
+                const float64_t * pSrcB, 
+                uint32_t blockSize);
 
-/**
+
+ /**
    * @brief  Sum of the squares of the elements of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_power_q31(const q31_t *pSrc, uint32_t blockSize, q63_t *pResult);
+  void arm_power_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q63_t * pResult);
 
-/**
+
+  /**
    * @brief  Sum of the squares of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_power_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_power_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
-/**
+
+  /**
    * @brief  Sum of the squares of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_power_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_power_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
 
-/**
+
+  /**
    * @brief  Sum of the squares of the elements of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_power_q15(const q15_t *pSrc, uint32_t blockSize, q63_t *pResult);
+  void arm_power_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q63_t * pResult);
 
-/**
+
+  /**
    * @brief  Sum of the squares of the elements of a Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_power_q7(const q7_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_power_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
-/**
+
+  /**
    * @brief  Mean value of a Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_mean_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult);
+  void arm_mean_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult);
 
-/**
+
+  /**
    * @brief  Mean value of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_mean_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_mean_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
-/**
+
+  /**
    * @brief  Mean value of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_mean_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_mean_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
-/**
+
+  /**
    * @brief  Mean value of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_mean_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_mean_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
-/**
+
+  /**
    * @brief  Mean value of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_mean_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_mean_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
 
-/**
+
+  /**
    * @brief  Variance of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_var_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_var_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
-/**
+
+  /**
    * @brief  Variance of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_var_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_var_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
 
-/**
+
+  /**
    * @brief  Variance of the elements of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_var_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_var_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
-/**
+
+  /**
    * @brief  Variance of the elements of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_var_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_var_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
-/**
+
+  /**
    * @brief  Root Mean Square of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_rms_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_rms_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
-/**
+
+  /**
    * @brief  Root Mean Square of the elements of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_rms_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_rms_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
-/**
+
+  /**
    * @brief  Root Mean Square of the elements of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_rms_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_rms_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
-/**
+
+  /**
    * @brief  Standard deviation of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_std_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_std_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
-/**
+
+  /**
    * @brief  Standard deviation of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_std_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_std_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
 
-/**
+
+  /**
    * @brief  Standard deviation of the elements of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_std_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_std_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
-/**
+
+  /**
    * @brief  Standard deviation of the elements of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_std_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_std_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
-/**
+
+  
+  /**
    * @brief  Minimum value of a Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
-   * @param[out] result     is output pointer
-   * @param[in]  index      is the array index of the minimum value in the input buffer.
+   * @param[out] pResult     is output pointer
+   * @param[in]  pIndex      is the array index of the minimum value in the input buffer.
    */
-void arm_min_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *result, uint32_t *index);
+  void arm_min_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
-   * @param[out] result     is output pointer
-   * @param[in]  index      is the array index of the minimum value in the input buffer.
+   * @param[out] pResult    is output pointer
+   * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_absmin_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *result, uint32_t *index);
+  void arm_absmin_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
 
-/**
+    /**
    * @brief  Minimum value of absolute values of a Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
-   * @param[out] result     is output pointer
+   * @param[out] pResult    is output pointer
    */
-void arm_absmin_no_idx_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *result);
+  void arm_absmin_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult);
 
-/**
+
+  /**
    * @brief  Minimum value of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_min_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult, uint32_t *pIndex);
+  void arm_min_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
 
 /**
    * @brief  Minimum value of absolute values of a Q15 vector.
@@ -341,95 +457,137 @@ void arm_min_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult, uint32_t
    * @param[out] pResult    is output pointer
    * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_absmin_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult, uint32_t *pIndex);
+  void arm_absmin_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    */
-void arm_absmin_no_idx_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_absmin_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
-/**
+
+  /**
    * @brief  Minimum value of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_min_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult, uint32_t *pIndex);
+  void arm_min_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_absmin_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult, uint32_t *pIndex);
+  void arm_absmin_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
 
-/**
+ /**
    * @brief  Minimum value of absolute values of a Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    */
-void arm_absmin_no_idx_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_absmin_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
-/**
+
+  /**
    * @brief  Minimum value of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_min_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult, uint32_t *pIndex);
+  void arm_min_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_absmin_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult,
-                    uint32_t *pIndex);
+  void arm_absmin_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    */
-void arm_absmin_no_idx_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_absmin_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
-/**
+
+  /**
    * @brief  Minimum value of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_min_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult, uint32_t *pIndex);
+  void arm_min_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_absmin_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult,
-                    uint32_t *pIndex);
+  void arm_absmin_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
    * @brief  Minimum value of absolute values of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    */
-void arm_absmin_no_idx_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_absmin_no_idx_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
 
 /**
  * @brief Maximum value of a Q7 vector.
@@ -438,7 +596,11 @@ void arm_absmin_no_idx_f64(const float64_t *pSrc, uint32_t blockSize, float64_t
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_max_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult, uint32_t *pIndex);
+  void arm_max_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a Q7 vector.
@@ -447,7 +609,11 @@ void arm_max_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult, uint32_t *p
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_absmax_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult, uint32_t *pIndex);
+  void arm_absmax_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a Q7 vector.
@@ -455,7 +621,11 @@ void arm_absmax_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult, uint32_t
  * @param[in]  blockSize  length of the input vector
  * @param[out] pResult    maximum value returned here
  */
-void arm_absmax_no_idx_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult);
+  void arm_absmax_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult);
+
 
 /**
  * @brief Maximum value of a Q15 vector.
@@ -464,7 +634,11 @@ void arm_absmax_no_idx_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult);
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_max_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult, uint32_t *pIndex);
+  void arm_max_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a Q15 vector.
@@ -473,15 +647,22 @@ void arm_max_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult, uint32_t
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_absmax_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult, uint32_t *pIndex);
+  void arm_absmax_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
 
-/**
+  /**
  * @brief Maximum value of absolute values of a Q15 vector.
  * @param[in]  pSrc       points to the input buffer
  * @param[in]  blockSize  length of the input vector
  * @param[out] pResult    maximum value returned here
  */
-void arm_absmax_no_idx_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_absmax_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 /**
  * @brief Maximum value of a Q31 vector.
@@ -490,7 +671,11 @@ void arm_absmax_no_idx_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_max_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult, uint32_t *pIndex);
+  void arm_max_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a Q31 vector.
@@ -499,15 +684,22 @@ void arm_max_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult, uint32_t
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_absmax_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult, uint32_t *pIndex);
+  void arm_absmax_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
 
-/**
+ /**
  * @brief Maximum value of absolute values of a Q31 vector.
  * @param[in]  pSrc       points to the input buffer
  * @param[in]  blockSize  length of the input vector
  * @param[out] pResult    maximum value returned here
  */
-void arm_absmax_no_idx_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_absmax_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 /**
  * @brief Maximum value of a floating-point vector.
@@ -516,7 +708,11 @@ void arm_absmax_no_idx_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_max_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult, uint32_t *pIndex);
+  void arm_max_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a floating-point vector.
@@ -525,16 +721,22 @@ void arm_max_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult,
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_absmax_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult,
-                    uint32_t *pIndex);
+  void arm_absmax_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
 
-/**
+ /**
  * @brief Maximum value of absolute values of a floating-point vector.
  * @param[in]  pSrc       points to the input buffer
  * @param[in]  blockSize  length of the input vector
  * @param[out] pResult    maximum value returned here
  */
-void arm_absmax_no_idx_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_absmax_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 /**
  * @brief Maximum value of a floating-point vector.
@@ -543,7 +745,11 @@ void arm_absmax_no_idx_f32(const float32_t *pSrc, uint32_t blockSize, float32_t
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_max_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult, uint32_t *pIndex);
+  void arm_max_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a floating-point vector.
@@ -552,8 +758,11 @@ void arm_max_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult,
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_absmax_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult,
-                    uint32_t *pIndex);
+  void arm_absmax_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a floating-point vector.
@@ -561,97 +770,120 @@ void arm_absmax_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResul
  * @param[in]  blockSize  length of the input vector
  * @param[out] pResult    maximum value returned here
  */
-void arm_absmax_no_idx_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_absmax_no_idx_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
 
-/**
+  /**
     @brief         Maximum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    maximum value returned here
-    @return        none
    */
-void arm_max_no_idx_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_max_no_idx_f32(
+      const float32_t *pSrc,
+      uint32_t   blockSize,
+      float32_t *pResult);
 
-/**
+  /**
     @brief         Minimum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    minimum value returned here
-    @return        none
    */
-void arm_min_no_idx_f32(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult);
+  void arm_min_no_idx_f32(
+      const float32_t *pSrc,
+      uint32_t   blockSize,
+      float32_t *pResult);
 
-/**
+  /**
     @brief         Maximum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    maximum value returned here
-    @return        none
    */
-void arm_max_no_idx_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_max_no_idx_f64(
+      const float64_t *pSrc,
+      uint32_t   blockSize,
+      float64_t *pResult);
 
-/**
+  /**
     @brief         Maximum value of a q31 vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    maximum value returned here
-    @return        none
    */
-void arm_max_no_idx_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_max_no_idx_q31(
+      const q31_t *pSrc,
+      uint32_t   blockSize,
+      q31_t *pResult);
 
-/**
+  /**
     @brief         Maximum value of a q15 vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    maximum value returned here
-    @return        none
    */
-void arm_max_no_idx_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_max_no_idx_q15(
+      const q15_t *pSrc,
+      uint32_t   blockSize,
+      q15_t *pResult);
 
-/**
+  /**
     @brief         Maximum value of a q7 vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    maximum value returned here
-    @return        none
    */
-void arm_max_no_idx_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult);
+  void arm_max_no_idx_q7(
+      const q7_t *pSrc,
+      uint32_t   blockSize,
+      q7_t *pResult);
 
-/**
+  /**
     @brief         Minimum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    minimum value returned here
-    @return        none
    */
-void arm_min_no_idx_f64(const float64_t *pSrc, uint32_t blockSize, float64_t *pResult);
+  void arm_min_no_idx_f64(
+      const float64_t *pSrc,
+      uint32_t   blockSize,
+      float64_t *pResult);
 
 /**
     @brief         Minimum value of a q31 vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    minimum value returned here
-    @return        none
    */
-void arm_min_no_idx_q31(const q31_t *pSrc, uint32_t blockSize, q31_t *pResult);
+  void arm_min_no_idx_q31(
+      const q31_t *pSrc,
+      uint32_t   blockSize,
+      q31_t *pResult);
 
-/**
+  /**
     @brief         Minimum value of a q15 vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    minimum value returned here
-    @return        none
    */
-void arm_min_no_idx_q15(const q15_t *pSrc, uint32_t blockSize, q15_t *pResult);
+  void arm_min_no_idx_q15(
+      const q15_t *pSrc,
+      uint32_t   blockSize,
+      q15_t *pResult);
 
-/**
+  /**
     @brief         Minimum value of a q7 vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    minimum value returned here
-    @return        none
    */
-void arm_min_no_idx_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult);
+void arm_min_no_idx_q7(
+     const q7_t *pSrc,
+      uint32_t   blockSize,
+      q7_t *pResult);
 
 /**
   @brief         Mean square error between two Q7 vectors.
@@ -659,10 +891,13 @@ void arm_min_no_idx_q7(const q7_t *pSrc, uint32_t blockSize, q7_t *pResult);
   @param[in]     pSrcB       points to the second input vector
   @param[in]     blockSize  number of samples in input vector
   @param[out]    pResult    mean square error
-  @return        none 
 */
-
-void arm_mse_q7(const q7_t *pSrcA, const q7_t *pSrcB, uint32_t blockSize, q7_t *pResult);
+  
+void arm_mse_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q7_t * pResult);
 
 /**
   @brief         Mean square error between two Q15 vectors.
@@ -670,10 +905,13 @@ void arm_mse_q7(const q7_t *pSrcA, const q7_t *pSrcB, uint32_t blockSize, q7_t *
   @param[in]     pSrcB       points to the second input vector
   @param[in]     blockSize  number of samples in input vector
   @param[out]    pResult    mean square error
-  @return        none 
 */
-
-void arm_mse_q15(const q15_t *pSrcA, const q15_t *pSrcB, uint32_t blockSize, q15_t *pResult);
+  
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 /**
   @brief         Mean square error between two Q31 vectors.
@@ -681,10 +919,13 @@ void arm_mse_q15(const q15_t *pSrcA, const q15_t *pSrcB, uint32_t blockSize, q15
   @param[in]     pSrcB       points to the second input vector
   @param[in]     blockSize  number of samples in input vector
   @param[out]    pResult    mean square error
-  @return        none 
 */
-
-void arm_mse_q31(const q31_t *pSrcA, const q31_t *pSrcB, uint32_t blockSize, q31_t *pResult);
+  
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 /**
   @brief         Mean square error between two single precision float vectors.
@@ -692,11 +933,13 @@ void arm_mse_q31(const q31_t *pSrcA, const q31_t *pSrcB, uint32_t blockSize, q31
   @param[in]     pSrcB       points to the second input vector
   @param[in]     blockSize  number of samples in input vector
   @param[out]    pResult    mean square error
-  @return        none 
 */
-
-void arm_mse_f32(const float32_t *pSrcA, const float32_t *pSrcB, uint32_t blockSize,
-                 float32_t *pResult);
+  
+void arm_mse_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 /**
   @brief         Mean square error between two double precision float vectors.
@@ -704,13 +947,41 @@ void arm_mse_f32(const float32_t *pSrcA, const float32_t *pSrcB, uint32_t blockS
   @param[in]     pSrcB       points to the second input vector
   @param[in]     blockSize  number of samples in input vector
   @param[out]    pResult    mean square error
-  @return        none 
 */
+  
+void arm_mse_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
+/**
+ * @brief  Accumulation value of a floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_f32(
+const float32_t * pSrc,
+      uint32_t blockSize,
+      float32_t * pResult);
+
+/**
+ * @brief  Accumulation value of a floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_f64(
+const float64_t * pSrc,
+      uint32_t blockSize,
+      float64_t * pResult);
 
-void arm_mse_f64(const float64_t *pSrcA, const float64_t *pSrcB, uint32_t blockSize,
-                 float64_t *pResult);
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/statistics_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/statistics_functions_f16.h
old mode 100644
new mode 100755
similarity index 70%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/statistics_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/statistics_functions_f16.h
index 9898dd20144..746e8df39b8
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/statistics_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/statistics_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     statistics_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     14 July 2022
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _STATISTICS_FUNCTIONS_F16_H_
-#define _STATISTICS_FUNCTIONS_F16_H_
+ 
+#ifndef STATISTICS_FUNCTIONS_F16_H_
+#define STATISTICS_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -35,70 +36,93 @@
 #include "dsp/basic_math_functions_f16.h"
 #include "dsp/fast_math_functions_f16.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+ /**
    * @brief  Sum of the squares of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_power_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_power_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
 
-/**
+ /**
    * @brief  Mean value of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_mean_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_mean_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
 
-/**
+  /**
    * @brief  Variance of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_var_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_var_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
 
-/**
+ /**
    * @brief  Root Mean Square of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_rms_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_rms_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
 
-/**
+ /**
    * @brief  Standard deviation of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output value.
    */
-void arm_std_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_std_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
 
-/**
+ /**
    * @brief  Minimum value of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_min_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult, uint32_t *pIndex);
+  void arm_min_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
 
-/**
+ /**
    * @brief  Minimum value of absolute values of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
-void arm_absmin_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult,
-                    uint32_t *pIndex);
+  void arm_absmin_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of a floating-point vector.
@@ -107,7 +131,11 @@ void arm_absmin_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResul
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_max_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult, uint32_t *pIndex);
+  void arm_max_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
 
 /**
  * @brief Maximum value of absolute values of a floating-point vector.
@@ -116,16 +144,22 @@ void arm_max_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult,
  * @param[out] pResult    maximum value returned here
  * @param[out] pIndex     index of maximum value returned here
  */
-void arm_absmax_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult,
-                    uint32_t *pIndex);
+  void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
 
-/**
+    /**
    * @brief  Minimum value of absolute values of a floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[in]  blockSize  is the number of samples to process
    * @param[out] pResult    is output pointer
    */
-void arm_absmin_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_absmin_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
 
 /**
  * @brief Maximum value of a floating-point vector.
@@ -133,7 +167,11 @@ void arm_absmin_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t
  * @param[in]  blockSize  length of the input vector
  * @param[out] pResult    maximum value returned here
  */
-void arm_absmax_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_absmax_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
 
 /**
  * @brief Entropy
@@ -141,13 +179,13 @@ void arm_absmax_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t
  * @param[in]  pSrcA        Array of input values.
  * @param[in]  blockSize    Number of samples in the input array.
  * @return     Entropy      -Sum(p ln p)
- *
  */
+float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize);
 
-float16_t arm_entropy_f16(const float16_t *pSrcA, uint32_t blockSize);
 
 float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize);
 
+
 /**
  * @brief Dot product with log arithmetic
  *
@@ -158,11 +196,12 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize);
  * @param[in]       blockSize number of samples in each vector
  * @param[in]       pTmpBuffer temporary buffer of length blockSize
  * @return The log of the dot product .
- *
  */
+float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
+  const float16_t * pSrcB,
+  uint32_t blockSize,
+  float16_t *pTmpBuffer);
 
-float16_t arm_logsumexp_dot_prod_f16(const float16_t *pSrcA, const float16_t *pSrcB,
-                                     uint32_t blockSize, float16_t *pTmpBuffer);
 
 /**
  * @brief Kullback-Leibler
@@ -171,28 +210,35 @@ float16_t arm_logsumexp_dot_prod_f16(const float16_t *pSrcA, const float16_t *pS
  * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
  * @param[in]  blockSize     Number of samples in the input array.
  * @return Kullback-Leibler  Divergence D(A || B)
- *
  */
-float16_t arm_kullback_leibler_f16(const float16_t *pSrcA, const float16_t *pSrcB,
-                                   uint32_t blockSize);
+float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
+  ,const float16_t * pSrcB
+  ,uint32_t blockSize);
+
 
 /**
     @brief         Maximum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    maximum value returned here
-    @return        none
    */
-void arm_max_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_max_no_idx_f16(
+      const float16_t *pSrc,
+      uint32_t   blockSize,
+      float16_t *pResult);
+
 
 /**
     @brief         Minimum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
     @param[in]     blockSize  number of samples in input vector
     @param[out]    pResult    minimum value returned here
-    @return        none
    */
-void arm_min_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pResult);
+  void arm_min_no_idx_f16(
+      const float16_t *pSrc,
+      uint32_t   blockSize,
+      float16_t *pResult);
+
 
 /**
   @brief         Mean square error between two half precision float vectors.
@@ -200,14 +246,28 @@ void arm_min_no_idx_f16(const float16_t *pSrc, uint32_t blockSize, float16_t *pR
   @param[in]     pSrcB       points to the second input vector
   @param[in]     blockSize  number of samples in input vector
   @param[out]    pResult    mean square error
-  @return        none 
 */
+void arm_mse_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+
+/**
+  * @brief  Sum value of a floating-point vector.
+  * @param[in]  pSrc       is input pointer
+  * @param[in]  blockSize  is the number of samples to process
+  * @param[out] pResult    is output value.
+  */
+ void arm_accumulate_f16(
+ const float16_t * pSrc,
+       uint32_t blockSize,
+       float16_t * pResult);
 
-void arm_mse_f16(const float16_t *pSrcA, const float16_t *pSrcB, uint32_t blockSize,
-                 float16_t *pResult);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/support_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/support_functions.h
old mode 100644
new mode 100755
similarity index 52%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/support_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/support_functions.h
index 3109e7338d2..928cf403f15
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/support_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/support_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     support_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     18 August 2022
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _SUPPORT_FUNCTIONS_H_
-#define _SUPPORT_FUNCTIONS_H_
+ 
+#ifndef SUPPORT_FUNCTIONS_H_
+#define SUPPORT_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -32,277 +33,485 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 /**
  * @defgroup groupSupport Support Functions
  */
 
+
+/**
+   * @brief Converts the elements of the 64 bit floating-point vector to floating-point vector.
+   * @param[in]  pSrc       points to the floating-point 64 input vector
+   * @param[out] pDst       points to the floating-point output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void arm_f64_to_float(
+  const float64_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+/**
+   * @brief Converts the elements of the 64 bit floating-point vector to Q31 vector.
+   * @param[in]  pSrc       points to the floating-point 64 input vector
+   * @param[out] pDst       points to the Q31 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void arm_f64_to_q31(
+  const float64_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+/**
+   * @brief Converts the elements of the 64 bit floating-point vector to Q15 vector.
+   * @param[in]  pSrc       points to the floating-point 64 input vector
+   * @param[out] pDst       points to the Q15 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void arm_f64_to_q15(
+  const float64_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+/**
+   * @brief Converts the elements of the 64 bit floating-point vector to Q7 vector.
+   * @param[in]  pSrc       points to the floating-point 64 input vector
+   * @param[out] pDst       points to the Q7 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void arm_f64_to_q7(
+  const float64_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+
+/**
+   * @brief Converts the elements of the floating-point  vector to 64 bit floating-point  vector.
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the 64 bit floating-point output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+  void arm_float_to_f64(
+  const float32_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
+
 /**
    * @brief Converts the elements of the floating-point vector to Q31 vector.
    * @param[in]  pSrc       points to the floating-point input vector
    * @param[out] pDst       points to the Q31 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_float_to_q31(const float32_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_float_to_q31(
+  const float32_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Converts the elements of the floating-point vector to Q15 vector.
    * @param[in]  pSrc       points to the floating-point input vector
    * @param[out] pDst       points to the Q15 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_float_to_q15(const float32_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_float_to_q15(
+  const float32_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief Converts the elements of the floating-point vector to Q7 vector.
    * @param[in]  pSrc       points to the floating-point input vector
    * @param[out] pDst       points to the Q7 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_float_to_q7(const float32_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_float_to_q7(
+  const float32_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 /**
+ * @brief  Converts the elements of the Q31 vector to 64 bit floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[out] pDst       is output pointer
+ * @param[in]  blockSize  is the number of samples to process
+ */
+void arm_q31_to_f64(
+const q31_t * pSrc,
+      float64_t * pDst,
+      uint32_t blockSize);
+
+  /**
    * @brief  Converts the elements of the Q31 vector to floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q31_to_float(const q31_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_q31_to_float(
+  const q31_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Converts the elements of the Q31 vector to Q15 vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q31_to_q15(const q31_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_q31_to_q15(
+  const q31_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Converts the elements of the Q31 vector to Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q31_to_q7(const q31_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_q31_to_q7(
+  const q31_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 /**
+ * @brief  Converts the elements of the Q15 vector to 64 bit floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[out] pDst       is output pointer
+ * @param[in]  blockSize  is the number of samples to process
+ */
+void arm_q15_to_f64(
+const q15_t * pSrc,
+      float64_t * pDst,
+      uint32_t blockSize);
+
+  /**
    * @brief  Converts the elements of the Q15 vector to floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q15_to_float(const q15_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_q15_to_float(
+  const q15_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Converts the elements of the Q15 vector to Q31 vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q15_to_q31(const q15_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_q15_to_q31(
+  const q15_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Converts the elements of the Q15 vector to Q7 vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q15_to_q7(const q15_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_q15_to_q7(
+  const q15_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 /**
+ * @brief  Converts the elements of the Q7 vector to 64 bit floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[out] pDst       is output pointer
+ * @param[in]  blockSize  is the number of samples to process
+ */
+void arm_q7_to_f64(
+const q7_t * pSrc,
+      float64_t * pDst,
+      uint32_t blockSize);
+
+  /**
    * @brief  Converts the elements of the Q7 vector to floating-point vector.
    * @param[in]  pSrc       is input pointer
    * @param[out] pDst       is output pointer
    * @param[in]  blockSize  is the number of samples to process
    */
-void arm_q7_to_float(const q7_t *pSrc, float32_t *pDst, uint32_t blockSize);
+  void arm_q7_to_float(
+  const q7_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Converts the elements of the Q7 vector to Q31 vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_q7_to_q31(const q7_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_q7_to_q31(
+  const q7_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Converts the elements of the Q7 vector to Q15 vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_q7_to_q15(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_q7_to_q15(
+  const q7_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+
+  
+  /**
    * @brief Struct for specifying sorting algorithm
    */
-typedef enum {
-    ARM_SORT_BITONIC = 0,
-    /**< Bitonic sort   */
-    ARM_SORT_BUBBLE = 1,
-    /**< Bubble sort    */
-    ARM_SORT_HEAP = 2,
-    /**< Heap sort      */
+  typedef enum
+  {
+    ARM_SORT_BITONIC   = 0,
+             /**< Bitonic sort   */
+    ARM_SORT_BUBBLE    = 1,
+             /**< Bubble sort    */
+    ARM_SORT_HEAP      = 2,
+             /**< Heap sort      */
     ARM_SORT_INSERTION = 3,
-    /**< Insertion sort */
-    ARM_SORT_QUICK = 4,
-    /**< Quick sort     */
+             /**< Insertion sort */
+    ARM_SORT_QUICK     = 4,
+             /**< Quick sort     */
     ARM_SORT_SELECTION = 5
-    /**< Selection sort */
-} arm_sort_alg;
+             /**< Selection sort */
+  } arm_sort_alg;
 
-/**
+  /**
    * @brief Struct for specifying sorting algorithm
    */
-typedef enum {
+  typedef enum
+  {
     ARM_SORT_DESCENDING = 0,
-    /**< Descending order (9 to 0) */
+             /**< Descending order (9 to 0) */
     ARM_SORT_ASCENDING = 1
-    /**< Ascending order (0 to 9) */
-} arm_sort_dir;
+             /**< Ascending order (0 to 9) */
+  } arm_sort_dir;
 
-/**
+  /**
    * @brief Instance structure for the sorting algorithms.
    */
-typedef struct {
-    arm_sort_alg alg; /**< Sorting algorithm selected */
-    arm_sort_dir dir; /**< Sorting order (direction)  */
-} arm_sort_instance_f32;
+  typedef struct            
+  {
+    arm_sort_alg alg;        /**< Sorting algorithm selected */
+    arm_sort_dir dir;        /**< Sorting order (direction)  */
+  } arm_sort_instance_f32;  
 
-/**
+  /**
    * @param[in]  S          points to an instance of the sorting structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
-void arm_sort_f32(const arm_sort_instance_f32 *S, float32_t *pSrc, float32_t *pDst,
-                  uint32_t blockSize);
+  void arm_sort_f32(
+    const arm_sort_instance_f32 * S, 
+          float32_t * pSrc, 
+          float32_t * pDst, 
+          uint32_t blockSize);
 
-/**
+  /**
    * @param[in,out]  S            points to an instance of the sorting structure.
    * @param[in]      alg          Selected algorithm.
    * @param[in]      dir          Sorting order.
    */
-void arm_sort_init_f32(arm_sort_instance_f32 *S, arm_sort_alg alg, arm_sort_dir dir);
+  void arm_sort_init_f32(
+    arm_sort_instance_f32 * S, 
+    arm_sort_alg alg, 
+    arm_sort_dir dir); 
 
-/**
+  /**
    * @brief Instance structure for the sorting algorithms.
    */
-typedef struct {
-    arm_sort_dir dir; /**< Sorting order (direction)  */
-    float32_t *buffer; /**< Working buffer */
-} arm_merge_sort_instance_f32;
+  typedef struct            
+  {
+    arm_sort_dir dir;        /**< Sorting order (direction)  */
+    float32_t * buffer;      /**< Working buffer */
+  } arm_merge_sort_instance_f32;  
 
-/**
+  /**
    * @param[in]      S          points to an instance of the sorting structure.
    * @param[in,out]  pSrc       points to the block of input data.
    * @param[out]     pDst       points to the block of output data
    * @param[in]      blockSize  number of samples to process.
    */
-void arm_merge_sort_f32(const arm_merge_sort_instance_f32 *S, float32_t *pSrc, float32_t *pDst,
-                        uint32_t blockSize);
+  void arm_merge_sort_f32(
+    const arm_merge_sort_instance_f32 * S,
+          float32_t *pSrc,
+          float32_t *pDst,
+          uint32_t blockSize);
 
-/**
+  /**
    * @param[in,out]  S            points to an instance of the sorting structure.
    * @param[in]      dir          Sorting order.
    * @param[in]      buffer       Working buffer.
    */
-void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 *S, arm_sort_dir dir, float32_t *buffer);
-
-/**
+  void arm_merge_sort_init_f32(
+    arm_merge_sort_instance_f32 * S,
+    arm_sort_dir dir,
+    float32_t * buffer);
+
+ 
+ 
+  /**
    * @brief  Copies the elements of a floating-point vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_copy_f32(const float32_t *pSrc, float32_t *pDst, uint32_t blockSize);
-
-/**
+  void arm_copy_f32(
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+ 
+ 
+  /**
    * @brief  Copies the elements of a floating-point vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_copy_f64(const float64_t *pSrc, float64_t *pDst, uint32_t blockSize);
+  void arm_copy_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+
+  /**
    * @brief  Copies the elements of a Q7 vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_copy_q7(const q7_t *pSrc, q7_t *pDst, uint32_t blockSize);
+  void arm_copy_q7(
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Copies the elements of a Q15 vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_copy_q15(const q15_t *pSrc, q15_t *pDst, uint32_t blockSize);
+  void arm_copy_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Copies the elements of a Q31 vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_copy_q31(const q31_t *pSrc, q31_t *pDst, uint32_t blockSize);
+  void arm_copy_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Fills a constant value into a floating-point vector.
    * @param[in]  value      input value to be filled
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_fill_f32(float32_t value, float32_t *pDst, uint32_t blockSize);
+  void arm_fill_f32(
+        float32_t value,
+        float32_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Fills a constant value into a floating-point vector.
    * @param[in]  value      input value to be filled
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_fill_f64(float64_t value, float64_t *pDst, uint32_t blockSize);
+  void arm_fill_f64(
+        float64_t value,
+        float64_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Fills a constant value into a Q7 vector.
    * @param[in]  value      input value to be filled
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_fill_q7(q7_t value, q7_t *pDst, uint32_t blockSize);
+  void arm_fill_q7(
+        q7_t value,
+        q7_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Fills a constant value into a Q15 vector.
    * @param[in]  value      input value to be filled
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize);
+  void arm_fill_q15(
+        q15_t value,
+        q15_t * pDst,
+        uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Fills a constant value into a Q31 vector.
    * @param[in]  value      input value to be filled
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_fill_q31(q31_t value, q31_t *pDst, uint32_t blockSize);
+  void arm_fill_q31(
+        q31_t value,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+
+
+
+
 
 /**
- * @brief Weighted sum
+ * @brief Weighted average
  *
  *
  * @param[in]    *in           Array of input values.
  * @param[in]    *weigths      Weights
  * @param[in]    blockSize     Number of samples in the input array.
- * @return Weighted sum
+ * @return Weighted average
  *
  */
-float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, uint32_t blockSize);
+float32_t arm_weighted_average_f32(const float32_t *in
+  , const float32_t *weigths
+  , uint32_t blockSize);
+
 
 /**
  * @brief Barycenter
@@ -313,13 +522,17 @@ float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, ui
  * @param[out]   out        Barycenter
  * @param[in]    nbVectors  Number of vectors
  * @param[in]    vecDim     Dimension of space (vector dimension)
- * @return       None
  *
  */
-void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out,
-                        uint32_t nbVectors, uint32_t vecDim);
+void arm_barycenter_f32(const float32_t *in
+  , const float32_t *weights
+  , float32_t *out
+  , uint32_t nbVectors
+  , uint32_t vecDim);
+
+
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/support_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/support_functions_f16.h
old mode 100644
new mode 100755
similarity index 70%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/support_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/support_functions_f16.h
index 1af398b219a..ab0c1ad7a90
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/support_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/support_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     support_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     18 August 2022
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _SUPPORT_FUNCTIONS_F16_H_
-#define _SUPPORT_FUNCTIONS_F16_H_
+ 
+#ifndef SUPPORT_FUNCTIONS_F16_H_
+#define SUPPORT_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -32,27 +33,30 @@
 #include "dsp/none.h"
 #include "dsp/utils.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
-/**
+  /**
    * @brief  Copies the elements of a floating-point vector.
    * @param[in]  pSrc       input pointer
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_copy_f16(const float16_t *pSrc, float16_t *pDst, uint32_t blockSize);
+void arm_copy_f16(const float16_t * pSrc, float16_t * pDst, uint32_t blockSize);
 
-/**
+
+  /**
    * @brief  Fills a constant value into a floating-point vector.
    * @param[in]  value      input value to be filled
    * @param[out] pDst       output pointer
    * @param[in]  blockSize  number of samples to process
    */
-void arm_fill_f16(float16_t value, float16_t *pDst, uint32_t blockSize);
+void arm_fill_f16(float16_t value, float16_t * pDst, uint32_t blockSize);
+
 
 /**
    * @brief Converts the elements of the floating-point vector to Q31 vector.
@@ -60,7 +64,8 @@ void arm_fill_f16(float16_t value, float16_t *pDst, uint32_t blockSize);
    * @param[out] pDst       points to the q15 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_f16_to_q15(const float16_t *pSrc, q15_t *pDst, uint32_t blockSize);
+void arm_f16_to_q15(const float16_t * pSrc, q15_t * pDst, uint32_t blockSize);
+
 
 /**
    * @brief Converts the elements of the floating-point vector to Q31 vector.
@@ -68,7 +73,26 @@ void arm_f16_to_q15(const float16_t *pSrc, q15_t *pDst, uint32_t blockSize);
    * @param[out] pDst       points to the f16 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_q15_to_f16(const q15_t *pSrc, float16_t *pDst, uint32_t blockSize);
+void arm_q15_to_f16(const q15_t * pSrc, float16_t * pDst, uint32_t blockSize);
+
+
+/**
+   * @brief Converts the elements of the 64 bit floating-point vector to 16 bit floating-point vector.
+   * @param[in]  pSrc       points to the f64 input vector
+   * @param[out] pDst       points to the f16 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+void arm_f64_to_f16(const float64_t * pSrc, float16_t * pDst, uint32_t blockSize);
+
+
+/**
+   * @brief Converts the elements of the 16 bit floating-point vector to 64 bit floating-point vector.
+   * @param[in]  pSrc       points to the f16 input vector
+   * @param[out] pDst       points to the f64 output vector
+   * @param[in]  blockSize  length of the input vector
+   */
+void arm_f16_to_f64(const float16_t * pSrc, float64_t * pDst, uint32_t blockSize);
+
 
 /**
    * @brief Converts the elements of the floating-point vector to Q31 vector.
@@ -76,7 +100,8 @@ void arm_q15_to_f16(const q15_t *pSrc, float16_t *pDst, uint32_t blockSize);
    * @param[out] pDst       points to the f16 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_float_to_f16(const float32_t *pSrc, float16_t *pDst, uint32_t blockSize);
+void arm_float_to_f16(const float32_t * pSrc, float16_t * pDst, uint32_t blockSize);
+
 
 /**
    * @brief Converts the elements of the floating-point vector to Q31 vector.
@@ -84,34 +109,35 @@ void arm_float_to_f16(const float32_t *pSrc, float16_t *pDst, uint32_t blockSize
    * @param[out] pDst       points to the f32 output vector
    * @param[in]  blockSize  length of the input vector
    */
-void arm_f16_to_float(const float16_t *pSrc, float32_t *pDst, uint32_t blockSize);
+void arm_f16_to_float(const float16_t * pSrc, float32_t * pDst, uint32_t blockSize);
+
 
 /**
- * @brief Weighted sum
- *
- *
+ * @brief Weighted average
  * @param[in]    *in           Array of input values.
  * @param[in]    *weigths      Weights
  * @param[in]    blockSize     Number of samples in the input array.
- * @return Weighted sum
- *
+ * @return Weighted average
  */
-float16_t arm_weighted_sum_f16(const float16_t *in, const float16_t *weigths, uint32_t blockSize);
+float16_t arm_weighted_average_f16(const float16_t *in
+  , const float16_t *weigths
+  , uint32_t blockSize);
+
 
 /**
  * @brief Barycenter
- *
- *
  * @param[in]    in         List of vectors
  * @param[in]    weights    Weights of the vectors
  * @param[out]   out        Barycenter
  * @param[in]    nbVectors  Number of vectors
  * @param[in]    vecDim     Dimension of space (vector dimension)
- * @return       None
- *
  */
-void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t *out,
-                        uint32_t nbVectors, uint32_t vecDim);
+void arm_barycenter_f16(const float16_t *in
+  , const float16_t *weights
+  , float16_t *out
+  , uint32_t nbVectors
+  , uint32_t vecDim);
+
 
 /**
   @ingroup groupSupport
@@ -140,9 +166,9 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
    */
 __STATIC_INLINE int16_t arm_typecast_s16_f16(float16_t x)
 {
-    int16_t res;
-    res = *(int16_t *)memcpy((char *)&res, (char *)&x, sizeof(float16_t));
-    return (res);
+   int16_t res;
+   res=*(int16_t*)memcpy((char*)&res,(char*)&x,sizeof(float16_t));
+   return(res);
 }
 
 /**
@@ -159,17 +185,19 @@ __STATIC_INLINE int16_t arm_typecast_s16_f16(float16_t x)
    */
 __STATIC_INLINE float16_t arm_typecast_f16_s16(int16_t x)
 {
-    float16_t res;
-    res = *(float16_t *)memcpy((char *)&res, (char *)&x, sizeof(int16_t));
-    return (res);
+   float16_t res;
+   res=*(float16_t*)memcpy((char*)&res,(char*)&x,sizeof(int16_t));
+   return(res);
 }
 
+
 /**
   @} end of typecast group
  */
 
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_defines.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_defines.h
old mode 100644
new mode 100755
similarity index 84%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_defines.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_defines.h
index 3aa2de03be5..185a8a902c9
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_defines.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_defines.h
@@ -24,21 +24,23 @@
  * limitations under the License.
  */
 
-#ifndef _SVM_DEFINES_H_
-#define _SVM_DEFINES_H_
+ 
+#ifndef SVM_DEFINES_H_
+#define SVM_DEFINES_H_
 
 /**
  * @brief Struct for specifying SVM Kernel
  */
-typedef enum {
+typedef enum
+{
     ARM_ML_KERNEL_LINEAR = 0,
-    /**< Linear kernel */
+             /**< Linear kernel */
     ARM_ML_KERNEL_POLYNOMIAL = 1,
-    /**< Polynomial kernel */
+             /**< Polynomial kernel */
     ARM_ML_KERNEL_RBF = 2,
-    /**< Radial Basis Function kernel */
+             /**< Radial Basis Function kernel */
     ARM_ML_KERNEL_SIGMOID = 3
-    /**< Sigmoid kernel */
+             /**< Sigmoid kernel */
 } arm_ml_kernel_type;
 
 #endif
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_functions.h
old mode 100644
new mode 100755
similarity index 62%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_functions.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_functions.h
index 91149b4a46d..cb00cd4f1b3
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_functions.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_functions.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _SVM_FUNCTIONS_H_
-#define _SVM_FUNCTIONS_H_
+ 
+#ifndef SVM_FUNCTIONS_H_
+#define SVM_FUNCTIONS_H_
 
 #include "arm_math_types.h"
 #include "arm_math_memory.h"
@@ -33,8 +34,9 @@
 #include "dsp/utils.h"
 #include "dsp/svm_defines.h"
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #define STEP(x) (x) <= 0 ? 0 : 1
@@ -58,73 +60,82 @@ extern "C" {
  * @param[in]    x           value
  * @param[in]    nb          integer exponent >= 1
  * @return x^nb
- *
  */
 __STATIC_INLINE float32_t arm_exponent_f32(float32_t x, int32_t nb)
 {
     float32_t r = x;
-    nb--;
-    while (nb > 0) {
+    nb --;
+    while(nb > 0)
+    {
         r = r * x;
         nb--;
     }
-    return (r);
+    return(r);
 }
 
+
 /**
  * @brief Instance structure for linear SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float32_t intercept; /**< Intercept */
-    const float32_t *dualCoefficients; /**< Dual coefficients */
-    const float32_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
 } arm_svm_linear_instance_f32;
 
+
 /**
  * @brief Instance structure for polynomial SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float32_t intercept; /**< Intercept */
-    const float32_t *dualCoefficients; /**< Dual coefficients */
-    const float32_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
-    int32_t degree; /**< Polynomial degree */
-    float32_t coef0; /**< Polynomial constant */
-    float32_t gamma; /**< Gamma factor */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  int32_t         degree;                 /**< Polynomial degree */
+  float32_t       coef0;                  /**< Polynomial constant */
+  float32_t       gamma;                  /**< Gamma factor */
 } arm_svm_polynomial_instance_f32;
 
+
 /**
  * @brief Instance structure for rbf SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float32_t intercept; /**< Intercept */
-    const float32_t *dualCoefficients; /**< Dual coefficients */
-    const float32_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
-    float32_t gamma; /**< Gamma factor */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float32_t       gamma;                  /**< Gamma factor */
 } arm_svm_rbf_instance_f32;
 
+
 /**
  * @brief Instance structure for sigmoid SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float32_t intercept; /**< Intercept */
-    const float32_t *dualCoefficients; /**< Dual coefficients */
-    const float32_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
-    float32_t coef0; /**< Independent constant */
-    float32_t gamma; /**< Gamma factor */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float32_t       intercept;              /**< Intercept */
+  const float32_t *dualCoefficients;      /**< Dual coefficients */
+  const float32_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float32_t       coef0;                  /**< Independent constant */
+  float32_t       gamma;                  /**< Gamma factor */
 } arm_svm_sigmoid_instance_f32;
 
+
 /**
  * @brief        SVM linear instance init function
  * @param[in]    S                      Parameters for SVM functions
@@ -134,26 +145,26 @@ typedef struct {
  * @param[in]    dualCoefficients       Array of dual coefficients
  * @param[in]    supportVectors         Array of support vectors
  * @param[in]    classes                Array of 2 classes ID
- * @return none.
- *
  */
+void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t  *classes);
 
-void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S, uint32_t nbOfSupportVectors,
-                             uint32_t vectorDimension, float32_t intercept,
-                             const float32_t *dualCoefficients, const float32_t *supportVectors,
-                             const int32_t *classes);
 
 /**
  * @brief SVM linear prediction
  * @param[in]    S          Pointer to an instance of the linear SVM structure.
  * @param[in]    in         Pointer to input vector
  * @param[out]   pResult    Decision value
- * @return none.
- *
  */
+void arm_svm_linear_predict_f32(const arm_svm_linear_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
 
-void arm_svm_linear_predict_f32(const arm_svm_linear_instance_f32 *S, const float32_t *in,
-                                int32_t *pResult);
 
 /**
  * @brief        SVM polynomial instance init function
@@ -167,26 +178,30 @@ void arm_svm_linear_predict_f32(const arm_svm_linear_instance_f32 *S, const floa
  * @param[in]    degree                 Polynomial degree
  * @param[in]    coef0                  coeff0 (scikit-learn terminology)
  * @param[in]    gamma                  gamma (scikit-learn terminology)
- * @return none.
- *
  */
+void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t   *classes,
+  int32_t      degree,
+  float32_t coef0,
+  float32_t gamma
+  );
 
-void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S, uint32_t nbOfSupportVectors,
-                                 uint32_t vectorDimension, float32_t intercept,
-                                 const float32_t *dualCoefficients, const float32_t *supportVectors,
-                                 const int32_t *classes, int32_t degree, float32_t coef0,
-                                 float32_t gamma);
 
 /**
  * @brief SVM polynomial prediction
  * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
  * @param[in]    in         Pointer to input vector
  * @param[out]   pResult    Decision value
- * @return none.
- *
  */
-void arm_svm_polynomial_predict_f32(const arm_svm_polynomial_instance_f32 *S, const float32_t *in,
-                                    int32_t *pResult);
+void arm_svm_polynomial_predict_f32(const arm_svm_polynomial_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
 
 /**
  * @brief        SVM radial basis function instance init function
@@ -198,25 +213,28 @@ void arm_svm_polynomial_predict_f32(const arm_svm_polynomial_instance_f32 *S, co
  * @param[in]    supportVectors         Array of support vectors
  * @param[in]    classes                Array of 2 classes ID
  * @param[in]    gamma                  gamma (scikit-learn terminology)
- * @return none.
- *
  */
+void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t   *classes,
+  float32_t gamma
+  );
 
-void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S, uint32_t nbOfSupportVectors,
-                          uint32_t vectorDimension, float32_t intercept,
-                          const float32_t *dualCoefficients, const float32_t *supportVectors,
-                          const int32_t *classes, float32_t gamma);
 
 /**
  * @brief SVM rbf prediction
  * @param[in]    S         Pointer to an instance of the rbf SVM structure.
  * @param[in]    in        Pointer to input vector
  * @param[out]   pResult   decision value
- * @return none.
- *
  */
-void arm_svm_rbf_predict_f32(const arm_svm_rbf_instance_f32 *S, const float32_t *in,
-                             int32_t *pResult);
+void arm_svm_rbf_predict_f32(const arm_svm_rbf_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
 
 /**
  * @brief        SVM sigmoid instance init function
@@ -229,27 +247,33 @@ void arm_svm_rbf_predict_f32(const arm_svm_rbf_instance_f32 *S, const float32_t
  * @param[in]    classes                Array of 2 classes ID
  * @param[in]    coef0                  coeff0 (scikit-learn terminology)
  * @param[in]    gamma                  gamma (scikit-learn terminology)
- * @return none.
- *
  */
+void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float32_t intercept,
+  const float32_t *dualCoefficients,
+  const float32_t *supportVectors,
+  const int32_t   *classes,
+  float32_t coef0,
+  float32_t gamma
+  );
 
-void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S, uint32_t nbOfSupportVectors,
-                              uint32_t vectorDimension, float32_t intercept,
-                              const float32_t *dualCoefficients, const float32_t *supportVectors,
-                              const int32_t *classes, float32_t coef0, float32_t gamma);
 
 /**
  * @brief SVM sigmoid prediction
  * @param[in]    S        Pointer to an instance of the rbf SVM structure.
  * @param[in]    in       Pointer to input vector
  * @param[out]   pResult  Decision value
- * @return none.
- *
  */
-void arm_svm_sigmoid_predict_f32(const arm_svm_sigmoid_instance_f32 *S, const float32_t *in,
-                                 int32_t *pResult);
+void arm_svm_sigmoid_predict_f32(const arm_svm_sigmoid_instance_f32 *S, 
+   const float32_t * in, 
+   int32_t * pResult);
+
+
+
 
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_functions_f16.h
old mode 100644
new mode 100755
similarity index 61%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_functions_f16.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_functions_f16.h
index 6c916d86065..5f757a0a10d
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/svm_functions_f16.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/svm_functions_f16.h
@@ -23,8 +23,9 @@
  * limitations under the License.
  */
 
-#ifndef _SVM_FUNCTIONS_F16_H_
-#define _SVM_FUNCTIONS_F16_H_
+ 
+#ifndef SVM_FUNCTIONS_F16_H_
+#define SVM_FUNCTIONS_F16_H_
 
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
@@ -33,8 +34,10 @@
 #include "dsp/utils.h"
 #include "dsp/svm_defines.h"
 
-#ifdef __cplusplus
-extern "C" {
+
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
@@ -55,60 +58,70 @@ extern "C" {
  * 
  */
 
+
+
 /**
  * @brief Instance structure for linear SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float16_t intercept; /**< Intercept */
-    const float16_t *dualCoefficients; /**< Dual coefficients */
-    const float16_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
 } arm_svm_linear_instance_f16;
 
+
 /**
  * @brief Instance structure for polynomial SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float16_t intercept; /**< Intercept */
-    const float16_t *dualCoefficients; /**< Dual coefficients */
-    const float16_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
-    int32_t degree; /**< Polynomial degree */
-    float16_t coef0; /**< Polynomial constant */
-    float16_t gamma; /**< Gamma factor */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  int32_t         degree;                 /**< Polynomial degree */
+  float16_t       coef0;                  /**< Polynomial constant */
+  float16_t       gamma;                  /**< Gamma factor */
 } arm_svm_polynomial_instance_f16;
 
+
 /**
  * @brief Instance structure for rbf SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float16_t intercept; /**< Intercept */
-    const float16_t *dualCoefficients; /**< Dual coefficients */
-    const float16_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
-    float16_t gamma; /**< Gamma factor */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float16_t       gamma;                  /**< Gamma factor */
 } arm_svm_rbf_instance_f16;
 
+
 /**
  * @brief Instance structure for sigmoid SVM prediction function.
  */
-typedef struct {
-    uint32_t nbOfSupportVectors; /**< Number of support vectors */
-    uint32_t vectorDimension; /**< Dimension of vector space */
-    float16_t intercept; /**< Intercept */
-    const float16_t *dualCoefficients; /**< Dual coefficients */
-    const float16_t *supportVectors; /**< Support vectors */
-    const int32_t *classes; /**< The two SVM classes */
-    float16_t coef0; /**< Independent constant */
-    float16_t gamma; /**< Gamma factor */
+typedef struct
+{
+  uint32_t        nbOfSupportVectors;     /**< Number of support vectors */
+  uint32_t        vectorDimension;        /**< Dimension of vector space */
+  float16_t       intercept;              /**< Intercept */
+  const float16_t *dualCoefficients;      /**< Dual coefficients */
+  const float16_t *supportVectors;        /**< Support vectors */
+  const int32_t   *classes;               /**< The two SVM classes */
+  float16_t       coef0;                  /**< Independent constant */
+  float16_t       gamma;                  /**< Gamma factor */
 } arm_svm_sigmoid_instance_f16;
 
+
 /**
  * @brief        SVM linear instance init function
  * @param[in]    S                      Parameters for SVM functions
@@ -118,26 +131,25 @@ typedef struct {
  * @param[in]    dualCoefficients       Array of dual coefficients
  * @param[in]    supportVectors         Array of support vectors
  * @param[in]    classes                Array of 2 classes ID
- * @return none.
- *
  */
-
-void arm_svm_linear_init_f16(arm_svm_linear_instance_f16 *S, uint32_t nbOfSupportVectors,
-                             uint32_t vectorDimension, float16_t intercept,
-                             const float16_t *dualCoefficients, const float16_t *supportVectors,
-                             const int32_t *classes);
+void arm_svm_linear_init_f16(arm_svm_linear_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t  *classes);
 
 /**
  * @brief SVM linear prediction
  * @param[in]    S          Pointer to an instance of the linear SVM structure.
  * @param[in]    in         Pointer to input vector
  * @param[out]   pResult    Decision value
- * @return none.
- *
  */
+void arm_svm_linear_predict_f16(const arm_svm_linear_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
 
-void arm_svm_linear_predict_f16(const arm_svm_linear_instance_f16 *S, const float16_t *in,
-                                int32_t *pResult);
 
 /**
  * @brief        SVM polynomial instance init function
@@ -151,26 +163,30 @@ void arm_svm_linear_predict_f16(const arm_svm_linear_instance_f16 *S, const floa
  * @param[in]    degree                 Polynomial degree
  * @param[in]    coef0                  coeff0 (scikit-learn terminology)
  * @param[in]    gamma                  gamma (scikit-learn terminology)
- * @return none.
- *
  */
+void arm_svm_polynomial_init_f16(arm_svm_polynomial_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t   *classes,
+  int32_t      degree,
+  float16_t coef0,
+  float16_t gamma
+  );
 
-void arm_svm_polynomial_init_f16(arm_svm_polynomial_instance_f16 *S, uint32_t nbOfSupportVectors,
-                                 uint32_t vectorDimension, float16_t intercept,
-                                 const float16_t *dualCoefficients, const float16_t *supportVectors,
-                                 const int32_t *classes, int32_t degree, float16_t coef0,
-                                 float16_t gamma);
 
 /**
  * @brief SVM polynomial prediction
  * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
  * @param[in]    in         Pointer to input vector
  * @param[out]   pResult    Decision value
- * @return none.
- *
  */
-void arm_svm_polynomial_predict_f16(const arm_svm_polynomial_instance_f16 *S, const float16_t *in,
-                                    int32_t *pResult);
+void arm_svm_polynomial_predict_f16(const arm_svm_polynomial_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
 
 /**
  * @brief        SVM radial basis function instance init function
@@ -182,25 +198,28 @@ void arm_svm_polynomial_predict_f16(const arm_svm_polynomial_instance_f16 *S, co
  * @param[in]    supportVectors         Array of support vectors
  * @param[in]    classes                Array of 2 classes ID
  * @param[in]    gamma                  gamma (scikit-learn terminology)
- * @return none.
- *
  */
+void arm_svm_rbf_init_f16(arm_svm_rbf_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t   *classes,
+  float16_t gamma
+  );
 
-void arm_svm_rbf_init_f16(arm_svm_rbf_instance_f16 *S, uint32_t nbOfSupportVectors,
-                          uint32_t vectorDimension, float16_t intercept,
-                          const float16_t *dualCoefficients, const float16_t *supportVectors,
-                          const int32_t *classes, float16_t gamma);
 
 /**
  * @brief SVM rbf prediction
  * @param[in]    S         Pointer to an instance of the rbf SVM structure.
  * @param[in]    in        Pointer to input vector
  * @param[out]   pResult   decision value
- * @return none.
- *
  */
-void arm_svm_rbf_predict_f16(const arm_svm_rbf_instance_f16 *S, const float16_t *in,
-                             int32_t *pResult);
+void arm_svm_rbf_predict_f16(const arm_svm_rbf_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
 
 /**
  * @brief        SVM sigmoid instance init function
@@ -213,28 +232,33 @@ void arm_svm_rbf_predict_f16(const arm_svm_rbf_instance_f16 *S, const float16_t
  * @param[in]    classes                Array of 2 classes ID
  * @param[in]    coef0                  coeff0 (scikit-learn terminology)
  * @param[in]    gamma                  gamma (scikit-learn terminology)
- * @return none.
- *
  */
+void arm_svm_sigmoid_init_f16(arm_svm_sigmoid_instance_f16 *S, 
+  uint32_t nbOfSupportVectors,
+  uint32_t vectorDimension,
+  float16_t intercept,
+  const float16_t *dualCoefficients,
+  const float16_t *supportVectors,
+  const int32_t   *classes,
+  float16_t coef0,
+  float16_t gamma
+  );
 
-void arm_svm_sigmoid_init_f16(arm_svm_sigmoid_instance_f16 *S, uint32_t nbOfSupportVectors,
-                              uint32_t vectorDimension, float16_t intercept,
-                              const float16_t *dualCoefficients, const float16_t *supportVectors,
-                              const int32_t *classes, float16_t coef0, float16_t gamma);
 
 /**
  * @brief SVM sigmoid prediction
  * @param[in]    S        Pointer to an instance of the rbf SVM structure.
  * @param[in]    in       Pointer to input vector
  * @param[out]   pResult  Decision value
- * @return none.
- *
  */
-void arm_svm_sigmoid_predict_f16(const arm_svm_sigmoid_instance_f16 *S, const float16_t *in,
-                                 int32_t *pResult);
+void arm_svm_sigmoid_predict_f16(const arm_svm_sigmoid_instance_f16 *S, 
+   const float16_t * in, 
+   int32_t * pResult);
+
+
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
-#ifdef __cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/transform_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/transform_functions.h
new file mode 100755
index 00000000000..b2c13d9dfcf
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/transform_functions.h
@@ -0,0 +1,1151 @@
+/******************************************************************************
+ * @file     transform_functions.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef TRANSFORM_FUNCTIONS_H_
+#define TRANSFORM_FUNCTIONS_H_
+
+#include "arm_math_types.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions.h"
+#include "dsp/complex_math_functions.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+/**
+ * @defgroup groupTransforms Transform Functions
+ */
+
+
+  /**
+   * @brief Instance structure for the Q15 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } arm_cfft_radix2_instance_q15;
+
+/* Deprecated */
+  arm_status arm_cfft_radix2_init_q15(
+        arm_cfft_radix2_instance_q15 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix2_q15(
+  const arm_cfft_radix2_instance_q15 * S,
+        q15_t * pSrc);
+
+
+  /**
+   * @brief Instance structure for the Q15 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } arm_cfft_radix4_instance_q15;
+
+/* Deprecated */
+  arm_status arm_cfft_radix4_init_q15(
+        arm_cfft_radix4_instance_q15 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix4_q15(
+  const arm_cfft_radix4_instance_q15 * S,
+        q15_t * pSrc);
+
+  /**
+   * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } arm_cfft_radix2_instance_q31;
+
+/* Deprecated */
+  arm_status arm_cfft_radix2_init_q31(
+        arm_cfft_radix2_instance_q31 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix2_q31(
+  const arm_cfft_radix2_instance_q31 * S,
+        q31_t * pSrc);
+
+  /**
+   * @brief Instance structure for the Q31 CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } arm_cfft_radix4_instance_q31;
+
+/* Deprecated */
+  void arm_cfft_radix4_q31(
+  const arm_cfft_radix4_instance_q31 * S,
+        q31_t * pSrc);
+
+/* Deprecated */
+  arm_status arm_cfft_radix4_init_q31(
+        arm_cfft_radix4_instance_q31 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float32_t onebyfftLen;             /**< value of 1/fftLen. */
+  } arm_cfft_radix2_instance_f32;
+
+
+/* Deprecated */
+  arm_status arm_cfft_radix2_init_f32(
+        arm_cfft_radix2_instance_f32 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix2_f32(
+  const arm_cfft_radix2_instance_f32 * S,
+        float32_t * pSrc);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float32_t onebyfftLen;             /**< value of 1/fftLen. */
+  } arm_cfft_radix4_instance_f32;
+
+
+
+/* Deprecated */
+  arm_status arm_cfft_radix4_init_f32(
+        arm_cfft_radix4_instance_f32 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix4_f32(
+  const arm_cfft_radix4_instance_f32 * S,
+        float32_t * pSrc);
+
+  /**
+   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+   const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
+   const q15_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
+   const q15_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
+   const q15_t *rearranged_twiddle_stride3;
+#endif
+  } arm_cfft_instance_q15;
+
+arm_status arm_cfft_init_4096_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_2048_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_1024_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_512_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_256_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_128_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_64_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_32_q15(arm_cfft_instance_q15 * S);
+arm_status arm_cfft_init_16_q15(arm_cfft_instance_q15 * S);
+
+arm_status arm_cfft_init_q15(
+  arm_cfft_instance_q15 * S,
+  uint16_t fftLen);
+
+void arm_cfft_q15(
+    const arm_cfft_instance_q15 * S,
+          q15_t * p1,
+          uint8_t ifftFlag,
+          uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+   const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
+   const q31_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
+   const q31_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
+   const q31_t *rearranged_twiddle_stride3;
+#endif
+  } arm_cfft_instance_q31;
+
+arm_status arm_cfft_init_4096_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_2048_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_1024_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_512_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_256_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_128_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_64_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_32_q31(arm_cfft_instance_q31 * S);
+arm_status arm_cfft_init_16_q31(arm_cfft_instance_q31 * S);
+
+arm_status arm_cfft_init_q31(
+  arm_cfft_instance_q31 * S,
+  uint16_t fftLen);
+
+void arm_cfft_q31(
+    const arm_cfft_instance_q31 * S,
+          q31_t * p1,
+          uint8_t ifftFlag,
+          uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+   const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
+   const float32_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
+   const float32_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
+   const float32_t *rearranged_twiddle_stride3;
+#endif
+  } arm_cfft_instance_f32;
+
+
+arm_status arm_cfft_init_4096_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_2048_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_1024_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_512_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_256_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_128_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_64_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_32_f32(arm_cfft_instance_f32 * S);
+arm_status arm_cfft_init_16_f32(arm_cfft_instance_f32 * S);
+
+  arm_status arm_cfft_init_f32(
+  arm_cfft_instance_f32 * S,
+  uint16_t fftLen);
+
+  void arm_cfft_f32(
+  const arm_cfft_instance_f32 * S,
+        float32_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+
+  /**
+   * @brief Instance structure for the Double Precision Floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float64_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+  } arm_cfft_instance_f64;
+
+arm_status arm_cfft_init_4096_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_2048_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_1024_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_512_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_256_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_128_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_64_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_32_f64(arm_cfft_instance_f64 * S);
+arm_status arm_cfft_init_16_f64(arm_cfft_instance_f64 * S);
+
+  arm_status arm_cfft_init_f64(
+  arm_cfft_instance_f64 * S,
+  uint16_t fftLen);
+  
+  void arm_cfft_f64(
+  const arm_cfft_instance_f64 * S,
+        float64_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the Q15 RFFT/RIFFT function.
+   */
+  typedef struct
+  {
+          uint32_t fftLenReal;                      /**< length of the real FFT. */
+          uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
+    const q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+    arm_cfft_instance_q15 cfftInst;
+#else
+    const arm_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
+#endif
+  } arm_rfft_instance_q15;
+
+arm_status arm_rfft_init_32_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_64_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_128_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_256_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_512_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_1024_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_2048_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_4096_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+arm_status arm_rfft_init_8192_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_q15(
+        arm_rfft_instance_q15 * S,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  void arm_rfft_q15(
+  const arm_rfft_instance_q15 * S,
+        q15_t * pSrc,
+        q15_t * pDst);
+
+  /**
+   * @brief Instance structure for the Q31 RFFT/RIFFT function.
+   */
+  typedef struct
+  {
+          uint32_t fftLenReal;                        /**< length of the real FFT. */
+          uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
+    const q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+    arm_cfft_instance_q31 cfftInst;
+#else
+    const arm_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
+#endif
+  } arm_rfft_instance_q31;
+
+  arm_status arm_rfft_init_32_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_64_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_128_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_256_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_512_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_1024_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_2048_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_4096_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_8192_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  arm_status arm_rfft_init_q31(
+        arm_rfft_instance_q31 * S,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  void arm_rfft_q31(
+  const arm_rfft_instance_q31 * S,
+        q31_t * pSrc,
+        q31_t * pDst);
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+  typedef struct
+  {
+          uint32_t fftLenReal;                        /**< length of the real FFT. */
+          uint16_t fftLenBy2;                         /**< length of the complex FFT. */
+          uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
+    const float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
+          arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
+  } arm_rfft_instance_f32;
+
+  arm_status arm_rfft_init_f32(
+        arm_rfft_instance_f32 * S,
+        arm_cfft_radix4_instance_f32 * S_CFFT,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
+
+  void arm_rfft_f32(
+  const arm_rfft_instance_f32 * S,
+        float32_t * pSrc,
+        float32_t * pDst);
+
+  /**
+   * @brief Instance structure for the Double Precision Floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          arm_cfft_instance_f64 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float64_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } arm_rfft_fast_instance_f64 ;
+
+arm_status arm_rfft_fast_init_32_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_64_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_128_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_256_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_512_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_1024_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_2048_f64( arm_rfft_fast_instance_f64 * S );
+arm_status arm_rfft_fast_init_4096_f64( arm_rfft_fast_instance_f64 * S );
+
+arm_status arm_rfft_fast_init_f64 (
+         arm_rfft_fast_instance_f64 * S,
+         uint16_t fftLen);
+
+
+void arm_rfft_fast_f64(
+    arm_rfft_fast_instance_f64 * S,
+    float64_t * p, float64_t * pOut,
+    uint8_t ifftFlag);
+
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } arm_rfft_fast_instance_f32 ;
+
+arm_status arm_rfft_fast_init_32_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_64_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_128_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_256_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_512_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_1024_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_2048_f32( arm_rfft_fast_instance_f32 * S );
+arm_status arm_rfft_fast_init_4096_f32( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_fast_init_f32 (
+         arm_rfft_fast_instance_f32 * S,
+         uint16_t fftLen);
+
+
+  void arm_rfft_fast_f32(
+        const arm_rfft_fast_instance_f32 * S,
+        float32_t * p, float32_t * pOut,
+        uint8_t ifftFlag);
+
+  /**
+   * @brief Instance structure for the floating-point DCT4/IDCT4 function.
+   */
+  typedef struct
+  {
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          float32_t normalize;                 /**< normalizing factor. */
+    const float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const float32_t *pCosFactor;               /**< points to the cosFactor table. */
+          arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
+          arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
+  } arm_dct4_instance_f32;
+
+
+  /**
+   * @brief  Initialization function for the floating-point DCT4/IDCT4.
+   * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
+   * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
+   */
+  arm_status arm_dct4_init_f32(
+        arm_dct4_instance_f32 * S,
+        arm_rfft_instance_f32 * S_RFFT,
+        arm_cfft_radix4_instance_f32 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        float32_t normalize);
+
+
+  /**
+   * @brief Processing function for the floating-point DCT4/IDCT4.
+   * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
+  void arm_dct4_f32(
+  const arm_dct4_instance_f32 * S,
+        float32_t * pState,
+        float32_t * pInlineBuffer);
+
+
+  /**
+   * @brief Instance structure for the Q31 DCT4/IDCT4 function.
+   */
+  typedef struct
+  {
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          q31_t normalize;                     /**< normalizing factor. */
+    const q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    const q31_t *pCosFactor;                   /**< points to the cosFactor table. */
+          arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
+          arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
+  } arm_dct4_instance_q31;
+
+
+  /**
+   * @brief  Initialization function for the Q31 DCT4/IDCT4.
+   * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
+   * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
+  arm_status arm_dct4_init_q31(
+        arm_dct4_instance_q31 * S,
+        arm_rfft_instance_q31 * S_RFFT,
+        arm_cfft_radix4_instance_q31 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        q31_t normalize);
+
+
+  /**
+   * @brief Processing function for the Q31 DCT4/IDCT4.
+   * @param[in]     S              points to an instance of the Q31 DCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
+  void arm_dct4_q31(
+  const arm_dct4_instance_q31 * S,
+        q31_t * pState,
+        q31_t * pInlineBuffer);
+
+
+  /**
+   * @brief Instance structure for the Q15 DCT4/IDCT4 function.
+   */
+  typedef struct
+  {
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          q15_t normalize;                     /**< normalizing factor. */
+    const q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    const q15_t *pCosFactor;                   /**< points to the cosFactor table. */
+          arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
+          arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
+  } arm_dct4_instance_q15;
+
+
+  /**
+   * @brief  Initialization function for the Q15 DCT4/IDCT4.
+   * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
+   * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
+  arm_status arm_dct4_init_q15(
+        arm_dct4_instance_q15 * S,
+        arm_rfft_instance_q15 * S_RFFT,
+        arm_cfft_radix4_instance_q15 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        q15_t normalize);
+
+
+  /**
+   * @brief Processing function for the Q15 DCT4/IDCT4.
+   * @param[in]     S              points to an instance of the Q15 DCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
+  void arm_dct4_q15(
+  const arm_dct4_instance_q15 * S,
+        q15_t * pState,
+        q15_t * pInlineBuffer);
+
+  /**
+   * @brief Instance structure for the Floating-point MFCC function.
+   */
+typedef struct
+  {
+     const float32_t *dctCoefs; /**< Internal DCT coefficients */
+     const float32_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const float32_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_f32 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_fast_instance_f32 rfft;
+#endif
+  } arm_mfcc_instance_f32 ;
+
+arm_status arm_mfcc_init_32_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_64_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_128_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_256_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_512_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_1024_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_2048_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_4096_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC F32
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+ */
+  void arm_mfcc_f32(
+  const arm_mfcc_instance_f32 * S,
+  float32_t *pSrc,
+  float32_t *pDst,
+  float32_t *pTmp
+  );
+
+ /**
+   * @brief Instance structure for the Q31 MFCC function.
+   */
+typedef struct
+  {
+     const q31_t *dctCoefs; /**< Internal DCT coefficients */
+     const q31_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const q31_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_q31 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_instance_q31 rfft;
+#endif
+  } arm_mfcc_instance_q31 ;
+
+arm_status arm_mfcc_init_32_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_64_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_128_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_256_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_512_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_1024_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_2048_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_4096_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC Q31
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+  @return        error status
+ */
+  arm_status arm_mfcc_q31(
+  const arm_mfcc_instance_q31 * S,
+  q31_t *pSrc,
+  q31_t *pDst,
+  q31_t *pTmp
+  );
+
+ /**
+   * @brief Instance structure for the Q15 MFCC function.
+   */
+typedef struct
+  {
+     const q15_t *dctCoefs; /**< Internal DCT coefficients */
+     const q15_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const q15_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_q15 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_instance_q15 rfft;
+#endif
+  } arm_mfcc_instance_q15 ;
+
+arm_status arm_mfcc_init_32_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_64_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_128_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_256_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_512_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_1024_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_2048_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_4096_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC Q15
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values in q8.7 format
+  @param[inout]     pTmp  points to a temporary buffer of complex
+  @return        error status
+ */
+  arm_status arm_mfcc_q15(
+  const arm_mfcc_instance_q15 * S,
+  q15_t *pSrc,
+  q15_t *pDst,
+  q31_t *pTmp
+  );
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/transform_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/transform_functions_f16.h
new file mode 100755
index 00000000000..b0ca0c0d2de
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/transform_functions_f16.h
@@ -0,0 +1,316 @@
+/******************************************************************************
+ * @file     transform_functions_f16.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef TRANSFORM_FUNCTIONS_F16_H_
+#define TRANSFORM_FUNCTIONS_F16_H_
+
+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float16_t onebyfftLen;             /**< value of 1/fftLen. */
+  } arm_cfft_radix2_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float16_t onebyfftLen;             /**< value of 1/fftLen. */
+  } arm_cfft_radix4_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float16_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+   const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
+   const float16_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
+   const float16_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
+   const float16_t *rearranged_twiddle_stride3;
+#endif
+  } arm_cfft_instance_f16;
+
+
+arm_status arm_cfft_init_4096_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_2048_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_1024_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_512_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_256_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_128_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_64_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_32_f16(arm_cfft_instance_f16 * S);
+arm_status arm_cfft_init_16_f16(arm_cfft_instance_f16 * S);
+
+
+  arm_status arm_cfft_init_f16(
+  arm_cfft_instance_f16 * S,
+  uint16_t fftLen);
+
+  void arm_cfft_f16(
+  const arm_cfft_instance_f16 * S,
+        float16_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          arm_cfft_instance_f16 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float16_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } arm_rfft_fast_instance_f16 ;
+
+arm_status arm_rfft_fast_init_32_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_64_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_128_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_256_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_512_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_1024_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_2048_f16( arm_rfft_fast_instance_f16 * S );
+arm_status arm_rfft_fast_init_4096_f16( arm_rfft_fast_instance_f16 * S );
+
+arm_status arm_rfft_fast_init_f16 (
+         arm_rfft_fast_instance_f16 * S,
+         uint16_t fftLen);
+
+
+  void arm_rfft_fast_f16(
+        const arm_rfft_fast_instance_f16 * S,
+        float16_t * p, float16_t * pOut,
+        uint8_t ifftFlag);
+
+/* Deprecated */
+  arm_status arm_cfft_radix4_init_f16(
+        arm_cfft_radix4_instance_f16 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix4_f16(
+  const arm_cfft_radix4_instance_f16 * S,
+        float16_t * pSrc);
+
+
+/* Deprecated */
+  arm_status arm_cfft_radix2_init_f16(
+        arm_cfft_radix2_instance_f16 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+
+/* Deprecated */
+  void arm_cfft_radix2_f16(
+  const arm_cfft_radix2_instance_f16 * S,
+        float16_t * pSrc);
+
+  /**
+   * @brief Instance structure for the Floating-point MFCC function.
+   */
+typedef struct
+  {
+     const float16_t *dctCoefs; /**< Internal DCT coefficients */
+     const float16_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const float16_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_f16 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_fast_instance_f16 rfft;
+#endif
+  } arm_mfcc_instance_f16 ;
+
+arm_status arm_mfcc_init_32_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_64_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_128_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_256_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_512_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_1024_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_2048_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_4096_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+arm_status arm_mfcc_init_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+
+
+/**
+  @brief         MFCC F16
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+ */
+  void arm_mfcc_f16(
+  const arm_mfcc_instance_f16 * S,
+  float16_t *pSrc,
+  float16_t *pDst,
+  float16_t *pTmp
+  );
+
+  
+#endif /* defined(ARM_FLOAT16_SUPPORTED)*/
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _TRANSFORM_FUNCTIONS_F16_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/utils.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/utils.h
old mode 100644
new mode 100755
similarity index 56%
rename from Libraries/CMSIS/5.9.0/DSP/Include/dsp/utils.h
rename to Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/utils.h
index d6e35356250..e0c5c90c17c
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/utils.h
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/utils.h
@@ -22,40 +22,62 @@
  * limitations under the License.
  */
 
-#ifndef _ARM_MATH_UTILS_H_
+#ifndef ARM_MATH_UTILS_H_
 
-#define _ARM_MATH_UTILS_H_
+#define ARM_MATH_UTILS_H_
 
 #include "arm_math_types.h"
+#include <limits.h>
 
-#ifdef __cplusplus
-extern "C" {
+#ifdef   __cplusplus
+extern "C"
+{
 #endif
 
-/**
+  /**
    * @brief Macros required for reciprocal calculation in Normalized LMS
    */
 
-#define INDEX_MASK 0x0000003F
+#define INDEX_MASK         0x0000003F
 
-#define SQ(x) ((x) * (x))
+#ifndef MIN
+  #define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif 
 
-#define ROUND_UP(N, S) ((((N) + (S)-1) / (S)) * (S))
+#ifndef MAX
+  #define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif 
 
-/**
+#ifndef ARM_SQ
+#define ARM_SQ(x) ((x) * (x))
+#endif
+
+#ifndef ARM_ROUND_UP
+  #define ARM_ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
+#endif
+
+
+  /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
+     It should not be used with negative values.
    */
-__STATIC_FORCEINLINE uint32_t arm_recip_q31(q31_t in, q31_t *dst, const q31_t *pRecipTable)
-{
+  __STATIC_FORCEINLINE uint32_t arm_recip_q31(
+        q31_t in,
+        q31_t * dst,
+  const q31_t * pRecipTable)
+  {
     q31_t out;
     uint32_t tempVal;
     uint32_t index, i;
     uint32_t signBits;
 
-    if (in > 0) {
-        signBits = ((uint32_t)(__CLZ(in) - 1));
-    } else {
-        signBits = ((uint32_t)(__CLZ(-in) - 1));
+    if (in > 0)
+    {
+      signBits = ((uint32_t) (__CLZ( (uint32_t)in) - 1));
+    }
+    else
+    {
+      signBits = ((uint32_t) (__CLZ((uint32_t)(-in)) - 1));
     }
 
     /* Convert input sample to 1.31 format */
@@ -70,12 +92,13 @@ __STATIC_FORCEINLINE uint32_t arm_recip_q31(q31_t in, q31_t *dst, const q31_t *p
 
     /* calculation of reciprocal value */
     /* running approximation for two iterations */
-    for (i = 0U; i < 2U; i++) {
-        tempVal = (uint32_t)(((q63_t)in * out) >> 31);
-        tempVal = 0x7FFFFFFFu - tempVal;
-        /*      1.31 with exp 1 */
-        /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
-        out = clip_q63_to_q31(((q63_t)out * tempVal) >> 30);
+    for (i = 0U; i < 2U; i++)
+    {
+      tempVal = (uint32_t) (((q63_t) in * out) >> 31);
+      tempVal = 0x7FFFFFFFu - tempVal;
+      /*      1.31 with exp 1 */
+      /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
+      out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
     }
 
     /* write output */
@@ -83,29 +106,37 @@ __STATIC_FORCEINLINE uint32_t arm_recip_q31(q31_t in, q31_t *dst, const q31_t *p
 
     /* return num of signbits of out = 1/in value */
     return (signBits + 1U);
-}
+  }
 
-/**
+
+  /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
+     It should not be used with negative values.
    */
-__STATIC_FORCEINLINE uint32_t arm_recip_q15(q15_t in, q15_t *dst, const q15_t *pRecipTable)
-{
+  __STATIC_FORCEINLINE uint32_t arm_recip_q15(
+        q15_t in,
+        q15_t * dst,
+  const q15_t * pRecipTable)
+  {
     q15_t out = 0;
-    uint32_t tempVal = 0;
+    int32_t tempVal = 0;
     uint32_t index = 0, i = 0;
     uint32_t signBits = 0;
 
-    if (in > 0) {
-        signBits = ((uint32_t)(__CLZ(in) - 17));
-    } else {
-        signBits = ((uint32_t)(__CLZ(-in) - 17));
+    if (in > 0)
+    {
+      signBits = ((uint32_t)(__CLZ( (uint32_t)in) - 17));
+    }
+    else
+    {
+      signBits = ((uint32_t)(__CLZ((uint32_t)(-in)) - 17));
     }
 
     /* Convert input sample to 1.15 format */
-    in = (in << signBits);
+    in = (q15_t)(in << signBits);
 
     /* calculation of index for initial approximated Val */
-    index = (uint32_t)(in >> 8);
+    index = (uint32_t)(in >>  8);
     index = (index & INDEX_MASK);
 
     /*      1.15 with exp 1  */
@@ -113,12 +144,13 @@ __STATIC_FORCEINLINE uint32_t arm_recip_q15(q15_t in, q15_t *dst, const q15_t *p
 
     /* calculation of reciprocal value */
     /* running approximation for two iterations */
-    for (i = 0U; i < 2U; i++) {
-        tempVal = (uint32_t)(((q31_t)in * out) >> 15);
-        tempVal = 0x7FFFu - tempVal;
-        /*      1.15 with exp 1 */
-        out = (q15_t)(((q31_t)out * tempVal) >> 14);
-        /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
+    for (i = 0U; i < 2U; i++)
+    {
+      tempVal = (((q31_t) in * out) >> 15);
+      tempVal = 0x7FFF - tempVal;
+      /*      1.15 with exp 1 */
+      out = (q15_t) (((q31_t) out * tempVal) >> 14);
+      /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
     }
 
     /* write output */
@@ -126,7 +158,8 @@ __STATIC_FORCEINLINE uint32_t arm_recip_q15(q15_t in, q15_t *dst, const q15_t *p
 
     /* return num of signbits of out = 1/in value */
     return (signBits + 1);
-}
+  }
+
 
 /**
  * @brief  64-bit to 32-bit unsigned normalization
@@ -134,32 +167,37 @@ __STATIC_FORCEINLINE uint32_t arm_recip_q15(q15_t in, q15_t *dst, const q15_t *p
  * @param[out] normalized   is the 32-bit normalized value
  * @param[out] norm         is norm scale
  */
-__STATIC_INLINE void arm_norm_64_to_32u(uint64_t in, int32_t *normalized, int32_t *norm)
+__STATIC_INLINE  void arm_norm_64_to_32u(uint64_t in, int32_t * normalized, int32_t *norm)
 {
-    int32_t n1;
-    int32_t hi = (int32_t)(in >> 32);
-    int32_t lo = (int32_t)((in << 32) >> 32);
+    int32_t     n1;
+    int32_t     hi = (int32_t) (in >> 32);
+    int32_t     lo = (int32_t) ((in << 32) >> 32);
 
-    n1 = __CLZ(hi) - 32;
-    if (!n1) {
+    n1 = __CLZ((uint32_t)hi) - 32;
+    if (!n1)
+    {
         /*
          * input fits in 32-bit
          */
-        n1 = __CLZ(lo);
-        if (!n1) {
+        n1 = __CLZ((uint32_t)lo);
+        if (!n1)
+        {
             /*
              * MSB set, need to scale down by 1
              */
             *norm = -1;
-            *normalized = (((uint32_t)lo) >> 1);
-        } else {
-            if (n1 == 32) {
+            *normalized = (((uint32_t) lo) >> 1);
+        } else
+        {
+            if (n1 == 32)
+            {
                 /*
                  * input is zero
                  */
                 *norm = 0;
                 *normalized = 0;
-            } else {
+            } else
+            {
                 /*
                  * 32-bit normalization
                  */
@@ -167,7 +205,8 @@ __STATIC_INLINE void arm_norm_64_to_32u(uint64_t in, int32_t *normalized, int32_
                 *normalized = lo << *norm;
             }
         }
-    } else {
+    } else
+    {
         /*
          * input fits in 64-bit
          */
@@ -176,38 +215,47 @@ __STATIC_INLINE void arm_norm_64_to_32u(uint64_t in, int32_t *normalized, int32_
         /*
          * 64 bit normalization
          */
-        *normalized = (((uint32_t)lo) >> n1) | (hi << (32 - n1));
+        *normalized = (int32_t)(((uint32_t)lo) >> n1) | (hi << (32 - n1));
     }
 }
 
-__STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
+__STATIC_INLINE int32_t arm_div_int64_to_int32(int64_t num, int32_t den)
 {
-    q31_t result;
-    uint64_t absNum;
-    int32_t normalized;
-    int32_t norm;
+    int32_t   result;
+    uint64_t   absNum;
+    int32_t   normalized;
+    int32_t   norm;
 
     /*
      * if sum fits in 32bits
      * avoid costly 64-bit division
      */
-    absNum = num > 0 ? num : -num;
+    if (num == (int64_t)LONG_MIN)
+    {
+        absNum = LONG_MAX;
+    }
+    else
+    {
+       absNum = (uint64_t) (num > 0 ? num : -num);
+    }
     arm_norm_64_to_32u(absNum, &normalized, &norm);
     if (norm > 0)
         /*
          * 32-bit division
          */
-        result = (q31_t)num / den;
+        result = (int32_t) num / den;
     else
         /*
          * 64-bit division
          */
-        result = (q31_t)(num / den);
+        result = (int32_t) (num / den);
 
     return result;
 }
 
-#ifdef __cplusplus
+#undef INDEX_MASK
+
+#ifdef   __cplusplus
 }
 #endif
 
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/window_functions.h b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/window_functions.h
new file mode 100644
index 00000000000..27f05a73964
--- /dev/null
+++ b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Include/dsp/window_functions.h
@@ -0,0 +1,812 @@
+/******************************************************************************
+ * @file     window_functions.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  v1.15.0
+ * @date     15 December 2022
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2022 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef WINDOW_FUNCTIONS_H_
+#define WINDOW_FUNCTIONS_H_
+
+#include "arm_math_types.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * @defgroup groupWindow Window Functions
+ */
+
+ /**
+   * @brief Welch window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           21.3 dB  |
+   * | Normalized equivalent noise bandwidth |          1.2 bins  |
+   * | Flatness                              |        -2.2248 dB  |
+   * | Recommended overlap                   |            29.3 %  |
+   *
+   */
+  void arm_welch_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Welch window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           21.3 dB  |
+   * | Normalized equivalent noise bandwidth |          1.2 bins  |
+   * | Flatness                              |        -2.2248 dB  |
+   * | Recommended overlap                   |            29.3 %  |
+   *
+   *
+   */
+  void arm_welch_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Bartlett window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           26.5 dB  |
+   * | Normalized equivalent noise bandwidth |       1.3333 bins  |
+   * | Flatness                              |        -1.8242 dB  |
+   * | Recommended overlap                   |            50.0 %  |
+   *
+   */
+  void arm_bartlett_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Bartlett window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           26.5 dB  |
+   * | Normalized equivalent noise bandwidth |       1.3333 bins  |
+   * | Flatness                              |        -1.8242 dB  |
+   * | Recommended overlap                   |            50.0 %  |
+   *
+   *
+   */
+  void arm_bartlett_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hamming window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           42.7 dB  |
+   * | Normalized equivalent noise bandwidth |       1.3628 bins  |
+   * | Flatness                              |        -1.7514 dB  |
+   * | Recommended overlap                   |              50 %  |
+   *
+   */
+  void arm_hamming_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hamming window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           42.7 dB  |
+   * | Normalized equivalent noise bandwidth |       1.3628 bins  |
+   * | Flatness                              |        -1.7514 dB  |
+   * | Recommended overlap                   |              50 %  |
+   *
+   *
+   */
+  void arm_hamming_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hanning window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           31.5 dB  |
+   * | Normalized equivalent noise bandwidth |          1.5 bins  |
+   * | Flatness                              |        -1.4236 dB  |
+   * | Recommended overlap                   |              50 %  |
+   *
+   */
+  void arm_hanning_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hanning window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           31.5 dB  |
+   * | Normalized equivalent noise bandwidth |          1.5 bins  |
+   * | Flatness                              |        -1.4236 dB  |
+   * | Recommended overlap                   |              50 %  |
+   *
+   *
+   */
+  void arm_hanning_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall3 window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           46.7 dB  |
+   * | Normalized equivalent noise bandwidth |       1.9444 bins  |
+   * | Flatness                              |         -0.863 dB  |
+   * | Recommended overlap                   |            64.7 %  |
+   *
+   */
+  void arm_nuttall3_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall3 window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           46.7 dB  |
+   * | Normalized equivalent noise bandwidth |       1.9444 bins  |
+   * | Flatness                              |         -0.863 dB  |
+   * | Recommended overlap                   |            64.7 %  |
+   *
+   *
+   */
+  void arm_nuttall3_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall4 window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           60.9 dB  |
+   * | Normalized equivalent noise bandwidth |         2.31 bins  |
+   * | Flatness                              |        -0.6184 dB  |
+   * | Recommended overlap                   |            70.5 %  |
+   *
+   */
+  void arm_nuttall4_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall4 window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           60.9 dB  |
+   * | Normalized equivalent noise bandwidth |         2.31 bins  |
+   * | Flatness                              |        -0.6184 dB  |
+   * | Recommended overlap                   |            70.5 %  |
+   *
+   *
+   */
+  void arm_nuttall4_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall3a window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           64.2 dB  |
+   * | Normalized equivalent noise bandwidth |       1.7721 bins  |
+   * | Flatness                              |        -1.0453 dB  |
+   * | Recommended overlap                   |            61.2 %  |
+   *
+   */
+  void arm_nuttall3a_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall3a window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           64.2 dB  |
+   * | Normalized equivalent noise bandwidth |       1.7721 bins  |
+   * | Flatness                              |        -1.0453 dB  |
+   * | Recommended overlap                   |            61.2 %  |
+   *
+   *
+   */
+  void arm_nuttall3a_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall3b window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           71.5 dB  |
+   * | Normalized equivalent noise bandwidth |       1.7037 bins  |
+   * | Flatness                              |        -1.1352 dB  |
+   * | Recommended overlap                   |            59.8 %  |
+   *
+   */
+  void arm_nuttall3b_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall3b window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           71.5 dB  |
+   * | Normalized equivalent noise bandwidth |       1.7037 bins  |
+   * | Flatness                              |        -1.1352 dB  |
+   * | Recommended overlap                   |            59.8 %  |
+   *
+   *
+   */
+  void arm_nuttall3b_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall4a window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           82.6 dB  |
+   * | Normalized equivalent noise bandwidth |       2.1253 bins  |
+   * | Flatness                              |        -0.7321 dB  |
+   * | Recommended overlap                   |            68.0 %  |
+   *
+   */
+  void arm_nuttall4a_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall4a window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           82.6 dB  |
+   * | Normalized equivalent noise bandwidth |       2.1253 bins  |
+   * | Flatness                              |        -0.7321 dB  |
+   * | Recommended overlap                   |            68.0 %  |
+   *
+   *
+   */
+  void arm_nuttall4a_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief 92 db blackman harris window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           92.0 dB  |
+   * | Normalized equivalent noise bandwidth |       2.0044 bins  |
+   * | Flatness                              |        -0.8256 dB  |
+   * | Recommended overlap                   |            66.1 %  |
+   *
+   */
+  void arm_blackman_harris_92db_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief 92 db blackman harris window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           92.0 dB  |
+   * | Normalized equivalent noise bandwidth |       2.0044 bins  |
+   * | Flatness                              |        -0.8256 dB  |
+   * | Recommended overlap                   |            66.1 %  |
+   *
+   *
+   */
+  void arm_blackman_harris_92db_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall4b window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           93.3 dB  |
+   * | Normalized equivalent noise bandwidth |       2.0212 bins  |
+   * | Flatness                              |        -0.8118 dB  |
+   * | Recommended overlap                   |            66.3 %  |
+   *
+   */
+  void arm_nuttall4b_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall4b window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           93.3 dB  |
+   * | Normalized equivalent noise bandwidth |       2.0212 bins  |
+   * | Flatness                              |        -0.8118 dB  |
+   * | Recommended overlap                   |            66.3 %  |
+   *
+   *
+   */
+  void arm_nuttall4b_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Nuttall4c window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           98.1 dB  |
+   * | Normalized equivalent noise bandwidth |       1.9761 bins  |
+   * | Flatness                              |        -0.8506 dB  |
+   * | Recommended overlap                   |            65.6 %  |
+   *
+   */
+  void arm_nuttall4c_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Nuttall4c window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           98.1 dB  |
+   * | Normalized equivalent noise bandwidth |       1.9761 bins  |
+   * | Flatness                              |        -0.8506 dB  |
+   * | Recommended overlap                   |            65.6 %  |
+   *
+   *
+   */
+  void arm_nuttall4c_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft90d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           90.2 dB  |
+   * | Normalized equivalent noise bandwidth |       3.8832 bins  |
+   * | Flatness                              |        -0.0039 dB  |
+   * | Recommended overlap                   |            76.0 %  |
+   *
+   */
+  void arm_hft90d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft90d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           90.2 dB  |
+   * | Normalized equivalent noise bandwidth |       3.8832 bins  |
+   * | Flatness                              |        -0.0039 dB  |
+   * | Recommended overlap                   |            76.0 %  |
+   *
+   *
+   */
+  void arm_hft90d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft95 window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           95.0 dB  |
+   * | Normalized equivalent noise bandwidth |       3.8112 bins  |
+   * | Flatness                              |         0.0044 dB  |
+   * | Recommended overlap                   |            75.6 %  |
+   *
+   */
+  void arm_hft95_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft95 window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |           95.0 dB  |
+   * | Normalized equivalent noise bandwidth |       3.8112 bins  |
+   * | Flatness                              |         0.0044 dB  |
+   * | Recommended overlap                   |            75.6 %  |
+   *
+   *
+   */
+  void arm_hft95_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft116d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          116.8 dB  |
+   * | Normalized equivalent noise bandwidth |       4.2186 bins  |
+   * | Flatness                              |        -0.0028 dB  |
+   * | Recommended overlap                   |            78.2 %  |
+   *
+   */
+  void arm_hft116d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft116d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          116.8 dB  |
+   * | Normalized equivalent noise bandwidth |       4.2186 bins  |
+   * | Flatness                              |        -0.0028 dB  |
+   * | Recommended overlap                   |            78.2 %  |
+   *
+   *
+   */
+  void arm_hft116d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft144d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          144.1 dB  |
+   * | Normalized equivalent noise bandwidth |       4.5386 bins  |
+   * | Flatness                              |         0.0021 dB  |
+   * | Recommended overlap                   |            79.9 %  |
+   *
+   */
+  void arm_hft144d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft144d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          144.1 dB  |
+   * | Normalized equivalent noise bandwidth |       4.5386 bins  |
+   * | Flatness                              |         0.0021 dB  |
+   * | Recommended overlap                   |            79.9 %  |
+   *
+   *
+   */
+  void arm_hft144d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft169d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          169.5 dB  |
+   * | Normalized equivalent noise bandwidth |       4.8347 bins  |
+   * | Flatness                              |         0.0017 dB  |
+   * | Recommended overlap                   |            81.2 %  |
+   *
+   */
+  void arm_hft169d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft169d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          169.5 dB  |
+   * | Normalized equivalent noise bandwidth |       4.8347 bins  |
+   * | Flatness                              |         0.0017 dB  |
+   * | Recommended overlap                   |            81.2 %  |
+   *
+   *
+   */
+  void arm_hft169d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft196d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          196.2 dB  |
+   * | Normalized equivalent noise bandwidth |       5.1134 bins  |
+   * | Flatness                              |         0.0013 dB  |
+   * | Recommended overlap                   |            82.3 %  |
+   *
+   */
+  void arm_hft196d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft196d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          196.2 dB  |
+   * | Normalized equivalent noise bandwidth |       5.1134 bins  |
+   * | Flatness                              |         0.0013 dB  |
+   * | Recommended overlap                   |            82.3 %  |
+   *
+   *
+   */
+  void arm_hft196d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft223d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          223.0 dB  |
+   * | Normalized equivalent noise bandwidth |       5.3888 bins  |
+   * | Flatness                              |         0.0011 dB  |
+   * | Recommended overlap                   |            83.3 %  |
+   *
+   */
+  void arm_hft223d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft223d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          223.0 dB  |
+   * | Normalized equivalent noise bandwidth |       5.3888 bins  |
+   * | Flatness                              |         0.0011 dB  |
+   * | Recommended overlap                   |            83.3 %  |
+   *
+   *
+   */
+  void arm_hft223d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+ /**
+   * @brief Hft248d window (double).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          248.4 dB  |
+   * | Normalized equivalent noise bandwidth |       5.6512 bins  |
+   * | Flatness                              |         0.0009 dB  |
+   * | Recommended overlap                   |            84.1 %  |
+   *
+   */
+  void arm_hft248d_f64(
+        float64_t * pDst,
+        uint32_t blockSize);
+
+ /**
+   * @brief Hft248d window (float).
+   * @param[out] pDst       points to the output generated window
+   * @param[in]  blockSize  number of samples in the window
+   *
+   * @par Parameters of the window
+   * 
+   * | Parameter                             | Value              |
+   * | ------------------------------------: | -----------------: |
+   * | Peak sidelobe level                   |          248.4 dB  |
+   * | Normalized equivalent noise bandwidth |       5.6512 bins  |
+   * | Flatness                              |         0.0009 dB  |
+   * | Recommended overlap                   |            84.1 %  |
+   *
+   *
+   */
+  void arm_hft248d_f32(
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _BASIC_MATH_FUNCTIONS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Lib/libarm_cortexM4l_math.a b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Lib/libarm_cortexM4l_math.a
new file mode 100644
index 00000000000..04ec56d0f75
Binary files /dev/null and b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Lib/libarm_cortexM4l_math.a differ
diff --git a/Libraries/CMSIS/5.9.0/DSP/1.16.2/Lib/libarm_cortexM4lf_math.a b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Lib/libarm_cortexM4lf_math.a
new file mode 100644
index 00000000000..ef65b59511b
Binary files /dev/null and b/Libraries/CMSIS/5.9.0/DSP/1.16.2/Lib/libarm_cortexM4lf_math.a differ
diff --git a/Libraries/CMSIS/5.9.0/DSP/CMSIS-DSP.mk b/Libraries/CMSIS/5.9.0/DSP/CMSIS-DSP.mk
index 7121c0873de..5e039a17eb5 100644
--- a/Libraries/CMSIS/5.9.0/DSP/CMSIS-DSP.mk
+++ b/Libraries/CMSIS/5.9.0/DSP/CMSIS-DSP.mk
@@ -1,12 +1,41 @@
+##############################################################################
+ #
+ # Copyright 2023-2024 Analog Devices, Inc.
+ #
+ # Licensed under the Apache License, Version 2.0 (the "License");
+ # you may not use this file except in compliance with the License.
+ # You may obtain a copy of the License at
+ #
+ #     http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ #
+ ##############################################################################
 # Makefile for linking against the CMSIS-DSP library.
 
 ifeq "$(CMSIS_ROOT)" ""
 CMSIS_ROOT=../../
 endif
 
+ifeq "$(CMSIS_DSP_DIR)" ""
+# If PERIPH_DRIVER_DIR is not specified, this Makefile will locate itself.
+CMSIS_DSP_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+endif
+
+CMSIS_DSP_VERSION ?= 1.16.2
+ifeq ("$(wildcard $(CMSIS_DSP_DIR)/$(CMSIS_DSP_VERSION))","")
+$(error Unsupported CMSIS-DSP version '$(CMSIS_DSP_VERSION)'.  Searched $(CMSIS_DSP_DIR))
+endif
+
+$(info Enabled CMSIS-DSP v$(CMSIS_DSP_VERSION))
+
 # Include paths...
 # DSP files
-IPATH += $(CMSIS_ROOT)/5.9.0/DSP/Include
+IPATH += $(CMSIS_DSP_DIR)/$(CMSIS_DSP_VERSION)/Include
 # Some newer CMSIS5 core include files, such as cmsis_compiler.h, etc.
 IPATH += $(CMSIS_ROOT)/5.9.0/Core/Include
 
@@ -18,7 +47,7 @@ PROJ_CFLAGS+=-DARM_MATH_CM4
 PROJ_CFLAGS+=-D__FPU_PRESENT
 
 # Where to find the DSP library file
-PROJ_LDFLAGS += -L$(CMSIS_ROOT)/5.9.0/DSP/Lib
+PROJ_LDFLAGS += -L$(CMSIS_DSP_DIR)/$(CMSIS_DSP_VERSION)/Lib
 
 ifeq "$(MFLOAT_ABI)" ""
 $(warning ***The 'MFLOAT_ABI' Makefile variable is not set!***  Using softfp CMSIS-DSP instructions by default.)
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_common_tables.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_common_tables.h
deleted file mode 100644
index 44b19ca71a1..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_common_tables.h
+++ /dev/null
@@ -1,630 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        arm_common_tables.h
- * Description:  Extern declaration for common tables
- *
- * @version  V1.10.0
- * @date     08 July 2021
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_COMMON_TABLES_H
-#define _ARM_COMMON_TABLES_H
-
-#include "arm_math_types.h"
-#include "dsp/fast_math_functions.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-/* Double Precision Float CFFT twiddles */
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
-extern const uint16_t armBitRevTable[1024];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_16)
-extern const uint64_t twiddleCoefF64_16[32];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_32)
-extern const uint64_t twiddleCoefF64_32[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_64)
-extern const uint64_t twiddleCoefF64_64[128];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_128)
-extern const uint64_t twiddleCoefF64_128[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_256)
-extern const uint64_t twiddleCoefF64_256[512];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_512)
-extern const uint64_t twiddleCoefF64_512[1024];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_1024)
-extern const uint64_t twiddleCoefF64_1024[2048];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_2048)
-extern const uint64_t twiddleCoefF64_2048[4096];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F64_4096)
-extern const uint64_t twiddleCoefF64_4096[8192];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_16)
-extern const float32_t twiddleCoef_16[32];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_32)
-extern const float32_t twiddleCoef_32[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_64)
-extern const float32_t twiddleCoef_64[128];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_128)
-extern const float32_t twiddleCoef_128[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_256)
-extern const float32_t twiddleCoef_256[512];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_512)
-extern const float32_t twiddleCoef_512[1024];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_1024)
-extern const float32_t twiddleCoef_1024[2048];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
-extern const float32_t twiddleCoef_2048[4096];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
-extern const float32_t twiddleCoef_4096[8192];
-#define twiddleCoef twiddleCoef_4096
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-/* Q31 */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_16)
-extern const q31_t twiddleCoef_16_q31[24];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_32)
-extern const q31_t twiddleCoef_32_q31[48];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_64)
-extern const q31_t twiddleCoef_64_q31[96];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_128)
-extern const q31_t twiddleCoef_128_q31[192];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_256)
-extern const q31_t twiddleCoef_256_q31[384];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_512)
-extern const q31_t twiddleCoef_512_q31[768];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_1024)
-extern const q31_t twiddleCoef_1024_q31[1536];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_2048)
-extern const q31_t twiddleCoef_2048_q31[3072];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_4096)
-extern const q31_t twiddleCoef_4096_q31[6144];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_16)
-extern const q15_t twiddleCoef_16_q15[24];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_32)
-extern const q15_t twiddleCoef_32_q15[48];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_64)
-extern const q15_t twiddleCoef_64_q15[96];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_128)
-extern const q15_t twiddleCoef_128_q15[192];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_256)
-extern const q15_t twiddleCoef_256_q15[384];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_512)
-extern const q15_t twiddleCoef_512_q15[768];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_1024)
-extern const q15_t twiddleCoef_1024_q15[1536];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_2048)
-extern const q15_t twiddleCoef_2048_q15[3072];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_4096)
-extern const q15_t twiddleCoef_4096_q15[6144];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-/* Double Precision Float RFFT twiddles */
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32)
-extern const uint64_t twiddleCoefF64_rfft_32[32];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64)
-extern const uint64_t twiddleCoefF64_rfft_64[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128)
-extern const uint64_t twiddleCoefF64_rfft_128[128];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256)
-extern const uint64_t twiddleCoefF64_rfft_256[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512)
-extern const uint64_t twiddleCoefF64_rfft_512[512];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024)
-extern const uint64_t twiddleCoefF64_rfft_1024[1024];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048)
-extern const uint64_t twiddleCoefF64_rfft_2048[2048];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096)
-extern const uint64_t twiddleCoefF64_rfft_4096[4096];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32)
-extern const float32_t twiddleCoef_rfft_32[32];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64)
-extern const float32_t twiddleCoef_rfft_64[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128)
-extern const float32_t twiddleCoef_rfft_128[128];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256)
-extern const float32_t twiddleCoef_rfft_256[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512)
-extern const float32_t twiddleCoef_rfft_512[512];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024)
-extern const float32_t twiddleCoef_rfft_1024[1024];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048)
-extern const float32_t twiddleCoef_rfft_2048[2048];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096)
-extern const float32_t twiddleCoef_rfft_4096[4096];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-/* Double precision floating-point bit reversal tables */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_16)
-#define ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH ((uint16_t)12)
-extern const uint16_t armBitRevIndexTableF64_16[ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_32)
-#define ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH ((uint16_t)24)
-extern const uint16_t armBitRevIndexTableF64_32[ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_64)
-#define ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH ((uint16_t)56)
-extern const uint16_t armBitRevIndexTableF64_64[ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_128)
-#define ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH ((uint16_t)112)
-extern const uint16_t armBitRevIndexTableF64_128[ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_256)
-#define ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH ((uint16_t)240)
-extern const uint16_t armBitRevIndexTableF64_256[ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_512)
-#define ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH ((uint16_t)480)
-extern const uint16_t armBitRevIndexTableF64_512[ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_1024)
-#define ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH ((uint16_t)992)
-extern const uint16_t armBitRevIndexTableF64_1024[ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_2048)
-#define ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH ((uint16_t)1984)
-extern const uint16_t armBitRevIndexTableF64_2048[ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT64_4096)
-#define ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH ((uint16_t)4032)
-extern const uint16_t armBitRevIndexTableF64_4096[ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-/* floating-point bit reversal tables */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_16)
-#define ARMBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20)
-extern const uint16_t armBitRevIndexTable16[ARMBITREVINDEXTABLE_16_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_32)
-#define ARMBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48)
-extern const uint16_t armBitRevIndexTable32[ARMBITREVINDEXTABLE_32_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_64)
-#define ARMBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56)
-extern const uint16_t armBitRevIndexTable64[ARMBITREVINDEXTABLE_64_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_128)
-#define ARMBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208)
-extern const uint16_t armBitRevIndexTable128[ARMBITREVINDEXTABLE_128_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_256)
-#define ARMBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440)
-extern const uint16_t armBitRevIndexTable256[ARMBITREVINDEXTABLE_256_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_512)
-#define ARMBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448)
-extern const uint16_t armBitRevIndexTable512[ARMBITREVINDEXTABLE_512_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_1024)
-#define ARMBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800)
-extern const uint16_t armBitRevIndexTable1024[ARMBITREVINDEXTABLE_1024_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_2048)
-#define ARMBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808)
-extern const uint16_t armBitRevIndexTable2048[ARMBITREVINDEXTABLE_2048_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FLT_4096)
-#define ARMBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032)
-extern const uint16_t armBitRevIndexTable4096[ARMBITREVINDEXTABLE_4096_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-/* fixed-point bit reversal tables */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_16)
-#define ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12)
-extern const uint16_t armBitRevIndexTable_fixed_16[ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_32)
-#define ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24)
-extern const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_64)
-#define ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56)
-extern const uint16_t armBitRevIndexTable_fixed_64[ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_128)
-#define ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112)
-extern const uint16_t armBitRevIndexTable_fixed_128[ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_256)
-#define ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240)
-extern const uint16_t armBitRevIndexTable_fixed_256[ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_512)
-#define ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480)
-extern const uint16_t armBitRevIndexTable_fixed_512[ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_1024)
-#define ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992)
-extern const uint16_t armBitRevIndexTable_fixed_1024[ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_2048)
-#define ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984)
-extern const uint16_t armBitRevIndexTable_fixed_2048[ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_BITREVIDX_FXT_4096)
-#define ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032)
-extern const uint16_t armBitRevIndexTable_fixed_4096[ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_REALCOEF_F32)
-extern const float32_t realCoefA[8192];
-extern const float32_t realCoefB[8192];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_REALCOEF_Q31)
-extern const q31_t realCoefAQ31[8192];
-extern const q31_t realCoefBQ31[8192];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_REALCOEF_Q15)
-extern const q15_t realCoefAQ15[8192];
-extern const q15_t realCoefBQ15[8192];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_F32_128)
-extern const float32_t Weights_128[256];
-extern const float32_t cos_factors_128[128];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_F32_512)
-extern const float32_t Weights_512[1024];
-extern const float32_t cos_factors_512[512];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_F32_2048)
-extern const float32_t Weights_2048[4096];
-extern const float32_t cos_factors_2048[2048];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_F32_8192)
-extern const float32_t Weights_8192[16384];
-extern const float32_t cos_factors_8192[8192];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q15_128)
-extern const q15_t WeightsQ15_128[256];
-extern const q15_t cos_factorsQ15_128[128];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q15_512)
-extern const q15_t WeightsQ15_512[1024];
-extern const q15_t cos_factorsQ15_512[512];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q15_2048)
-extern const q15_t WeightsQ15_2048[4096];
-extern const q15_t cos_factorsQ15_2048[2048];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q15_8192)
-extern const q15_t WeightsQ15_8192[16384];
-extern const q15_t cos_factorsQ15_8192[8192];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q31_128)
-extern const q31_t WeightsQ31_128[256];
-extern const q31_t cos_factorsQ31_128[128];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q31_512)
-extern const q31_t WeightsQ31_512[1024];
-extern const q31_t cos_factorsQ31_512[512];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q31_2048)
-extern const q31_t WeightsQ31_2048[4096];
-extern const q31_t cos_factorsQ31_2048[2048];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_DCT4_Q31_8192)
-extern const q31_t WeightsQ31_8192[16384];
-extern const q31_t cos_factorsQ31_8192[8192];
-#endif
-
-#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q15)
-extern const q15_t armRecipTableQ15[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q31)
-extern const q31_t armRecipTableQ31[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-
-/* Tables for Fast Math Sine and Cosine */
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32)
-extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31)
-extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15)
-extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-
-/* Fast vector sqrt */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || \
-    defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
-extern const q31_t sqrtTable_Q31[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-#endif
-
-/* Accurate scalar sqrt */
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q31)
-extern const q31_t sqrt_initial_lut_q31[32];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q15)
-extern const q15_t sqrt_initial_lut_q15[16];
-#endif
-
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || \
-    defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
-extern const q15_t sqrtTable_Q15[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
-#endif
-
-#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_TABLES) */
-
-#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
-extern const float32_t exp_tab[8];
-extern const float32_t __logf_lut_f32[8];
-#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
-extern const unsigned char hwLUT[256];
-#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*  ARM_COMMON_TABLES_H */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_common_tables_f16.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_common_tables_f16.h
deleted file mode 100644
index 7755fd4a3a1..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_common_tables_f16.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        arm_common_tables_f16.h
- * Description:  Extern declaration for common tables
- *
- * @version  V1.10.0
- * @date     08 July 2021
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_COMMON_TABLES_F16_H
-#define _ARM_COMMON_TABLES_F16_H
-
-#include "arm_math_types_f16.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-/* F16 */
-#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_16)
-extern const float16_t twiddleCoefF16_16[32];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_32)
-extern const float16_t twiddleCoefF16_32[64];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_64)
-extern const float16_t twiddleCoefF16_64[128];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_128)
-extern const float16_t twiddleCoefF16_128[256];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_256)
-extern const float16_t twiddleCoefF16_256[512];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_512)
-extern const float16_t twiddleCoefF16_512[1024];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_1024)
-extern const float16_t twiddleCoefF16_1024[2048];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
-extern const float16_t twiddleCoefF16_2048[4096];
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
-extern const float16_t twiddleCoefF16_4096[8192];
-#define twiddleCoefF16 twiddleCoefF16_4096
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_32)
-extern const float16_t twiddleCoefF16_rfft_32[32];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_64)
-extern const float16_t twiddleCoefF16_rfft_64[64];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_128)
-extern const float16_t twiddleCoefF16_rfft_128[128];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_256)
-extern const float16_t twiddleCoefF16_rfft_256[256];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_512)
-extern const float16_t twiddleCoefF16_rfft_512[512];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_1024)
-extern const float16_t twiddleCoefF16_rfft_1024[1024];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_2048)
-extern const float16_t twiddleCoefF16_rfft_2048[2048];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_4096)
-extern const float16_t twiddleCoefF16_rfft_4096[4096];
-#endif
-
-#endif /* ARMAC5 */
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
-
-#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
-extern const float16_t exp_tab_f16[8];
-extern const float16_t __logf_lut_f16[8];
-#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*  _ARM_COMMON_TABLES_F16_H */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_const_structs.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_const_structs.h
deleted file mode 100644
index 5f983f30798..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_const_structs.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        arm_const_structs.h
- * Description:  Constant structs that are initialized for user convenience.
- *               For example, some can be given as arguments to the arm_cfft_f32() function.
- *
- * @version  V1.10.0
- * @date     08 July 2021
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_CONST_STRUCTS_H
-#define _ARM_CONST_STRUCTS_H
-
-#include "arm_math_types.h"
-#include "arm_common_tables.h"
-#include "dsp/transform_functions.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len16;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len32;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len64;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len128;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len256;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len512;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len1024;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len2048;
-extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len4096;
-
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len16;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len32;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len64;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len128;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len256;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len512;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048;
-extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096;
-
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len16;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len32;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len64;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len128;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len256;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len512;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048;
-extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096;
-
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len16;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len32;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len64;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len128;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len256;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len512;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048;
-extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_const_structs_f16.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_const_structs_f16.h
deleted file mode 100644
index c7a7ffa740c..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_const_structs_f16.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        arm_const_structs_f16.h
- * Description:  Constant structs that are initialized for user convenience.
- *               For example, some can be given as arguments to the arm_cfft_f16() function.
- *
- * @version  V1.10.0
- * @date     08 July 2021
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_CONST_STRUCTS_F16_H
-#define _ARM_CONST_STRUCTS_F16_H
-
-#include "arm_math_types_f16.h"
-#include "arm_common_tables.h"
-#include "arm_common_tables_f16.h"
-#include "dsp/transform_functions_f16.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len16;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len32;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len64;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len128;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len256;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len512;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len1024;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len2048;
-#endif
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    (defined(ARM_TABLE_TWIDDLECOEF_F16_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
-extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len4096;
-#endif
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_math.h
deleted file mode 100644
index a4edf0d8c45..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/******************************************************************************
- * @file     arm_math.h
- * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
- * Target Processor: Cortex-M and Cortex-A cores
- ******************************************************************************/
-/*
- * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
-   \mainpage CMSIS DSP Software Library
-   *
-   * \section intro Introduction
-   *
-   * This user manual describes the CMSIS DSP software library,
-   * a suite of common signal processing functions for use on Cortex-M and Cortex-A processor 
-   * based devices.
-   *
-   * The library is divided into a number of functions each covering a specific category:
-   * - Basic math functions
-   * - Fast math functions
-   * - Complex math functions
-   * - Filtering functions
-   * - Matrix functions
-   * - Transform functions
-   * - Motor control functions
-   * - Statistical functions
-   * - Support functions
-   * - Interpolation functions
-   * - Support Vector Machine functions (SVM)
-   * - Bayes classifier functions
-   * - Distance functions
-   * - Quaternion functions
-   *
-   * The library has generally separate functions for operating on 8-bit integers, 16-bit integers,
-   * 32-bit integer and 32-bit floating-point values.
-   *
-   * The library is providing vectorized versions of most algorthms for Helium
-   * and of most f32 algorithms for Neon.
-   *
-   * When using a vectorized version, provide a little bit of padding after the end of
-   * a buffer (3 words) because the vectorized code may read a little bit after the end
-   * of a buffer. You don't have to modify your buffers but just ensure that the
-   * end of buffer + padding is not outside of a memory region.
-   *
-   * \section using Using the Library
-   *
-   * The library is released in source form. It is strongly advised to compile the library using -Ofast to
-   * have the best performances.
-   *
-   * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
-   * Simply include this file. If you don't want to include everything, you can also rely
-   * on headers in Include/dsp folder and use only what you need.
-   *
-   * \section example Examples
-   *
-   * The library ships with a number of examples which demonstrate how to use the library functions.
-   *
-   * \section toolchain Toolchain Support
-   *
-   * The library is now tested on Fast Models building with cmake.
-   * Core M0, M4, M7, M33, M55, A32 are tested.
-   *
-   *
-   * \section preprocessor Preprocessor Macros
-   *
-   * Each library project have different preprocessor macros.
-   *
-   * - ARM_MATH_BIG_ENDIAN:
-   *
-   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
-   *
-   * - ARM_MATH_MATRIX_CHECK:
-   *
-   * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
-   *
-   * - ARM_MATH_ROUNDING:
-   *
-   * Define macro ARM_MATH_ROUNDING for rounding on support functions
-   *
-   * - ARM_MATH_LOOPUNROLL:
-   *
-   * Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
-   *
-   * - ARM_MATH_NEON:
-   *
-   * Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions.
-   * It is not enabled by default when Neon is available because performances are 
-   * dependent on the compiler and target architecture.
-   *
-   * - ARM_MATH_NEON_EXPERIMENTAL:
-   *
-   * Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of 
-   * of some DSP functions. Experimental Neon versions currently do not have better
-   * performances than the scalar versions.
-   *
-   * - ARM_MATH_HELIUM:
-   *
-   * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_MVE_FLOAT16.
-   *
-   * - ARM_MATH_HELIUM_EXPERIMENTAL:
-   *
-   * Only taken into account when ARM_MATH_MVEF, ARM_MATH_MVEI or ARM_MATH_MVE_FLOAT16 are defined.
-   * Enable some vector versions which may have worse performance than scalar
-   * depending on the core / compiler configuration.
-   *
-   * - ARM_MATH_MVEF:
-   *
-   * Select Helium versions of the f32 algorithms.
-   * It implies ARM_MATH_FLOAT16 and ARM_MATH_MVEI.
-   *
-   * - ARM_MATH_MVEI:
-   *
-   * Select Helium versions of the int and fixed point algorithms.
-   *
-   * - ARM_MATH_MVE_FLOAT16:
-   *
-   * MVE Float16 implementations of some algorithms (Requires MVE extension).
-   *
-   * - DISABLEFLOAT16:
-   *
-   * Disable float16 algorithms when __fp16 is not supported for a
-   * specific compiler / core configuration.
-   * This is only valid for scalar. When vector architecture is
-   * supporting f16 then it can't be disabled.
-   *
-   * - ARM_MATH_AUTOVECTORIZE:
-   *
-   * With Helium or Neon, disable the use of vectorized code with C intrinsics
-   * and use pure C instead. The vectorization is then done by the compiler.
-   *
-   * <hr>
-   * \section pack CMSIS-DSP in ARM::CMSIS Pack
-   *
-   * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
-   * |File/Folder                      |Content                                                                 |
-   * |---------------------------------|------------------------------------------------------------------------|
-   * |\b CMSIS\\Documentation\\DSP     | This documentation                                                     |
-   * |\b CMSIS\\DSP\\Examples          | Example projects demonstrating the usage of the library functions      |
-   * |\b CMSIS\\DSP\\Include           | DSP_Lib include files for using and building the lib
-   * |\b CMSIS\\DSP\\PrivateInclude    | DSP_Lib private include files for building the lib                                               |
-   * |\b CMSIS\\DSP\\Lib               | DSP_Lib binaries                                                       |
-   * |\b CMSIS\\DSP\\Source            | DSP_Lib source files                                                   |
-   *
-   * <hr>
-   * \section rev Revision History of CMSIS-DSP
-   * Please refer to \ref ChangeLog_pg.
-   */
-
-/**
- * @defgroup groupExamples Examples
- */
-
-#ifndef _ARM_MATH_H
-#define _ARM_MATH_H
-
-#include "arm_math_types.h"
-#include "arm_math_memory.h"
-
-#include "dsp/none.h"
-#include "dsp/utils.h"
-
-#include "dsp/basic_math_functions.h"
-#include "dsp/interpolation_functions.h"
-#include "dsp/bayes_functions.h"
-#include "dsp/matrix_functions.h"
-#include "dsp/complex_math_functions.h"
-#include "dsp/statistics_functions.h"
-#include "dsp/controller_functions.h"
-#include "dsp/support_functions.h"
-#include "dsp/distance_functions.h"
-#include "dsp/svm_functions.h"
-#include "dsp/fast_math_functions.h"
-#include "dsp/transform_functions.h"
-#include "dsp/filtering_functions.h"
-#include "dsp/quaternion_math_functions.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//#define TABLE_SPACING_Q31     0x400000
-//#define TABLE_SPACING_Q15     0x80
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ARM_MATH_H */
-
-/**
- *
- * End of file.
- */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_types.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_types.h
deleted file mode 100644
index c55a3826524..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_math_types.h
+++ /dev/null
@@ -1,581 +0,0 @@
-/******************************************************************************
- * @file     arm_math_types.h
- * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
- * Target Processor: Cortex-M and Cortex-A cores
- ******************************************************************************/
-/*
- * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_MATH_TYPES_H_
-
-#define _ARM_MATH_TYPES_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Compiler specific diagnostic adjustment */
-#if defined(__CC_ARM)
-
-#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
-
-#elif defined(__APPLE_CC__)
-#pragma GCC diagnostic ignored "-Wold-style-cast"
-
-#elif defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wsign-conversion"
-#pragma GCC diagnostic ignored "-Wconversion"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-
-#elif defined(__ICCARM__)
-
-#elif defined(__TI_ARM__)
-
-#elif defined(__CSMC__)
-
-#elif defined(__TASKING__)
-
-#elif defined(_MSC_VER)
-
-#else
-#error Unknown compiler
-#endif
-
-/* Included for instrinsics definitions */
-#if defined(_MSC_VER)
-#include <stdint.h>
-#define __STATIC_FORCEINLINE static __forceinline
-#define __STATIC_INLINE static __inline
-#define __ALIGNED(x) __declspec(align(x))
-#elif defined(__APPLE_CC__)
-#include <stdint.h>
-#define __ALIGNED(x) __attribute__((aligned(x)))
-#define __STATIC_FORCEINLINE static inline __attribute__((always_inline))
-#define __STATIC_INLINE static inline
-#elif defined(__GNUC_PYTHON__)
-#include <stdint.h>
-#define __ALIGNED(x) __attribute__((aligned(x)))
-#define __STATIC_FORCEINLINE static inline __attribute__((always_inline))
-#define __STATIC_INLINE static inline
-
-#else
-#include "cmsis_compiler.h"
-#endif
-
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <limits.h>
-
-/* evaluate ARM DSP feature */
-#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
-#define ARM_MATH_DSP 1
-#endif
-
-#if defined(ARM_MATH_NEON)
-#if defined(_MSC_VER) && defined(_M_ARM64EC)
-#include <arm64_neon.h>
-#else
-#include <arm_neon.h>
-#endif
-#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#if !defined(ARM_MATH_NEON_FLOAT16)
-#define ARM_MATH_NEON_FLOAT16
-#endif
-#endif
-#endif
-
-#if !defined(ARM_MATH_AUTOVECTORIZE)
-
-#if defined(__ARM_FEATURE_MVE)
-#if __ARM_FEATURE_MVE
-#if !defined(ARM_MATH_MVEI)
-#define ARM_MATH_MVEI
-#endif
-#endif
-
-#if (__ARM_FEATURE_MVE & 2)
-#if !defined(ARM_MATH_MVEF)
-#define ARM_MATH_MVEF
-#endif
-#if !defined(ARM_MATH_MVE_FLOAT16)
-#define ARM_MATH_MVE_FLOAT16
-#endif
-#endif
-
-#endif /*defined(__ARM_FEATURE_MVE)*/
-#endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/
-
-#if defined(ARM_MATH_HELIUM)
-#if !defined(ARM_MATH_MVEF)
-#define ARM_MATH_MVEF
-#endif
-
-#if !defined(ARM_MATH_MVEI)
-#define ARM_MATH_MVEI
-#endif
-
-#if !defined(ARM_MATH_MVE_FLOAT16)
-#define ARM_MATH_MVE_FLOAT16
-#endif
-#endif
-
-#if defined(__CC_ARM)
-/* Enter low optimization region - place directly above function definition */
-#if defined(__ARM_ARCH_7EM__)
-#define LOW_OPTIMIZATION_ENTER _Pragma("push") _Pragma("O1")
-#else
-#define LOW_OPTIMIZATION_ENTER
-#endif
-
-/* Exit low optimization region - place directly after end of function definition */
-#if defined(__ARM_ARCH_7EM__)
-#define LOW_OPTIMIZATION_EXIT _Pragma("pop")
-#else
-#define LOW_OPTIMIZATION_EXIT
-#endif
-
-/* Enter low optimization region - place directly above function definition */
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-
-/* Exit low optimization region - place directly after end of function definition */
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__APPLE_CC__)
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__GNUC__)
-#define LOW_OPTIMIZATION_ENTER __attribute__((optimize("-O1")))
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__ICCARM__)
-/* Enter low optimization region - place directly above function definition */
-#if defined(__ARM_ARCH_7EM__)
-#define LOW_OPTIMIZATION_ENTER _Pragma("optimize=low")
-#else
-#define LOW_OPTIMIZATION_ENTER
-#endif
-
-/* Exit low optimization region - place directly after end of function definition */
-#define LOW_OPTIMIZATION_EXIT
-
-/* Enter low optimization region - place directly above function definition */
-#if defined(__ARM_ARCH_7EM__)
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER _Pragma("optimize=low")
-#else
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#endif
-
-/* Exit low optimization region - place directly after end of function definition */
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__TI_ARM__)
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__CSMC__)
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__TASKING__)
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(_MSC_VER) || defined(__GNUC_PYTHON__)
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-#endif
-
-/* Compiler specific diagnostic adjustment */
-#if defined(__CC_ARM)
-
-#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
-
-#elif defined(__APPLE_CC__)
-
-#elif defined(__GNUC__)
-#pragma GCC diagnostic pop
-
-#elif defined(__ICCARM__)
-
-#elif defined(__TI_ARM__)
-
-#elif defined(__CSMC__)
-
-#elif defined(__TASKING__)
-
-#elif defined(_MSC_VER)
-
-#else
-#error Unknown compiler
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#if defined(__ARM_FEATURE_MVE) && __ARM_FEATURE_MVE
-#include <arm_mve.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
-   * @brief 8-bit fractional data type in 1.7 format.
-   */
-typedef int8_t q7_t;
-
-/**
-   * @brief 16-bit fractional data type in 1.15 format.
-   */
-typedef int16_t q15_t;
-
-/**
-   * @brief 32-bit fractional data type in 1.31 format.
-   */
-typedef int32_t q31_t;
-
-/**
-   * @brief 64-bit fractional data type in 1.63 format.
-   */
-typedef int64_t q63_t;
-
-/**
-   * @brief 32-bit floating-point type definition.
-   */
-#if !defined(__ICCARM__) || !(__ARM_FEATURE_MVE & 2)
-typedef float float32_t;
-#endif
-
-/**
-   * @brief 64-bit floating-point type definition.
-   */
-typedef double float64_t;
-
-/**
-   * @brief vector types
-   */
-#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE))
-/**
-   * @brief 64-bit fractional 128-bit vector data type in 1.63 format
-   */
-typedef int64x2_t q63x2_t;
-
-/**
-   * @brief 32-bit fractional 128-bit vector data type in 1.31 format.
-   */
-typedef int32x4_t q31x4_t;
-
-/**
-   * @brief 16-bit fractional 128-bit vector data type with 16-bit alignment in 1.15 format.
-   */
-typedef __ALIGNED(2) int16x8_t q15x8_t;
-
-/**
-   * @brief 8-bit fractional 128-bit vector data type with 8-bit alignment in 1.7 format.
-   */
-typedef __ALIGNED(1) int8x16_t q7x16_t;
-
-/**
-   * @brief 32-bit fractional 128-bit vector pair data type in 1.31 format.
-   */
-typedef int32x4x2_t q31x4x2_t;
-
-/**
-   * @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format.
-   */
-typedef int32x4x4_t q31x4x4_t;
-
-/**
-   * @brief 16-bit fractional 128-bit vector pair data type in 1.15 format.
-   */
-typedef int16x8x2_t q15x8x2_t;
-
-/**
-   * @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format.
-   */
-typedef int16x8x4_t q15x8x4_t;
-
-/**
-   * @brief 8-bit fractional 128-bit vector pair data type in 1.7 format.
-   */
-typedef int8x16x2_t q7x16x2_t;
-
-/**
-   * @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format.
-   */
-typedef int8x16x4_t q7x16x4_t;
-
-/**
-   * @brief 32-bit fractional data type in 9.23 format.
-   */
-typedef int32_t q23_t;
-
-/**
-   * @brief 32-bit fractional 128-bit vector data type in 9.23 format.
-   */
-typedef int32x4_t q23x4_t;
-
-/**
-   * @brief 64-bit status 128-bit vector data type.
-   */
-typedef int64x2_t status64x2_t;
-
-/**
-   * @brief 32-bit status 128-bit vector data type.
-   */
-typedef int32x4_t status32x4_t;
-
-/**
-   * @brief 16-bit status 128-bit vector data type.
-   */
-typedef int16x8_t status16x8_t;
-
-/**
-   * @brief 8-bit status 128-bit vector data type.
-   */
-typedef int8x16_t status8x16_t;
-
-#endif
-
-#if defined(ARM_MATH_NEON) || \
-    (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
-/**
-   * @brief 32-bit floating-point 128-bit vector type
-   */
-typedef float32x4_t f32x4_t;
-
-/**
-   * @brief 32-bit floating-point 128-bit vector pair data type
-   */
-typedef float32x4x2_t f32x4x2_t;
-
-/**
-   * @brief 32-bit floating-point 128-bit vector quadruplet data type
-   */
-typedef float32x4x4_t f32x4x4_t;
-
-/**
-   * @brief 32-bit ubiquitous 128-bit vector data type
-   */
-typedef union _any32x4_t {
-    float32x4_t f;
-    int32x4_t i;
-} any32x4_t;
-
-#endif
-
-#if defined(ARM_MATH_NEON)
-/**
-   * @brief 32-bit fractional 64-bit vector data type in 1.31 format.
-   */
-typedef int32x2_t q31x2_t;
-
-/**
-   * @brief 16-bit fractional 64-bit vector data type in 1.15 format.
-   */
-typedef __ALIGNED(2) int16x4_t q15x4_t;
-
-/**
-   * @brief 8-bit fractional 64-bit vector data type in 1.7 format.
-   */
-typedef __ALIGNED(1) int8x8_t q7x8_t;
-
-/**
-   * @brief 32-bit float 64-bit vector data type.
-   */
-typedef float32x2_t f32x2_t;
-
-/**
-   * @brief 32-bit floating-point 128-bit vector triplet data type
-   */
-typedef float32x4x3_t f32x4x3_t;
-
-/**
-   * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format
-   */
-typedef int32x4x3_t q31x4x3_t;
-
-/**
-   * @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format
-   */
-typedef int16x8x3_t q15x8x3_t;
-
-/**
-   * @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format
-   */
-typedef int8x16x3_t q7x16x3_t;
-
-/**
-   * @brief 32-bit floating-point 64-bit vector pair data type
-   */
-typedef float32x2x2_t f32x2x2_t;
-
-/**
-   * @brief 32-bit floating-point 64-bit vector triplet data type
-   */
-typedef float32x2x3_t f32x2x3_t;
-
-/**
-   * @brief 32-bit floating-point 64-bit vector quadruplet data type
-   */
-typedef float32x2x4_t f32x2x4_t;
-
-/**
-   * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format
-   */
-typedef int32x2x2_t q31x2x2_t;
-
-/**
-   * @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format
-   */
-typedef int32x2x3_t q31x2x3_t;
-
-/**
-   * @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format
-   */
-typedef int32x4x3_t q31x2x4_t;
-
-/**
-   * @brief 16-bit fractional 64-bit vector pair data type in 1.15 format
-   */
-typedef int16x4x2_t q15x4x2_t;
-
-/**
-   * @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format
-   */
-typedef int16x4x2_t q15x4x3_t;
-
-/**
-   * @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format
-   */
-typedef int16x4x3_t q15x4x4_t;
-
-/**
-   * @brief 8-bit fractional 64-bit vector pair data type in 1.7 format
-   */
-typedef int8x8x2_t q7x8x2_t;
-
-/**
-   * @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format
-   */
-typedef int8x8x3_t q7x8x3_t;
-
-/**
-   * @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format
-   */
-typedef int8x8x4_t q7x8x4_t;
-
-/**
-   * @brief 32-bit ubiquitous 64-bit vector data type
-   */
-typedef union _any32x2_t {
-    float32x2_t f;
-    int32x2_t i;
-} any32x2_t;
-
-/**
-   * @brief 32-bit status 64-bit vector data type.
-   */
-typedef int32x4_t status32x2_t;
-
-/**
-   * @brief 16-bit status 64-bit vector data type.
-   */
-typedef int16x8_t status16x4_t;
-
-/**
-   * @brief 8-bit status 64-bit vector data type.
-   */
-typedef int8x16_t status8x8_t;
-
-#endif
-
-#define F64_MAX ((float64_t)DBL_MAX)
-#define F32_MAX ((float32_t)FLT_MAX)
-
-#define F64_MIN (-DBL_MAX)
-#define F32_MIN (-FLT_MAX)
-
-#define F64_ABSMAX ((float64_t)DBL_MAX)
-#define F32_ABSMAX ((float32_t)FLT_MAX)
-
-#define F64_ABSMIN ((float64_t)0.0)
-#define F32_ABSMIN ((float32_t)0.0)
-
-#define Q31_MAX ((q31_t)(0x7FFFFFFFL))
-#define Q15_MAX ((q15_t)(0x7FFF))
-#define Q7_MAX ((q7_t)(0x7F))
-#define Q31_MIN ((q31_t)(0x80000000L))
-#define Q15_MIN ((q15_t)(0x8000))
-#define Q7_MIN ((q7_t)(0x80))
-
-#define Q31_ABSMAX ((q31_t)(0x7FFFFFFFL))
-#define Q15_ABSMAX ((q15_t)(0x7FFF))
-#define Q7_ABSMAX ((q7_t)(0x7F))
-#define Q31_ABSMIN ((q31_t)0)
-#define Q15_ABSMIN ((q15_t)0)
-#define Q7_ABSMIN ((q7_t)0)
-
-/* Dimension C vector space */
-#define CMPLX_DIM 2
-
-/**
-   * @brief Error status returned by some functions in the library.
-   */
-
-typedef enum {
-    ARM_MATH_SUCCESS = 0, /**< No error */
-    ARM_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */
-    ARM_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */
-    ARM_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation */
-    ARM_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */
-    ARM_MATH_SINGULAR = -5, /**< Input matrix is singular and cannot be inverted */
-    ARM_MATH_TEST_FAILURE = -6, /**< Test Failed */
-    ARM_MATH_DECOMPOSITION_FAILURE = -7 /**< Decomposition Failed */
-} arm_status;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*ifndef _ARM_MATH_TYPES_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_mve_tables.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_mve_tables.h
deleted file mode 100644
index 742674174bb..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_mve_tables.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        arm_mve_tables.h
- * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
- *               used for MVE implementation only
- *
- * @version  V1.10.0
- * @date     04 October 2021
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_MVE_TABLES_H
-#define _ARM_MVE_TABLES_H
-
-#include "arm_math_types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_16) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_16_f32[2];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_16_f32[2];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_16_f32[2];
-extern float32_t rearranged_twiddle_stride1_16_f32[8];
-extern float32_t rearranged_twiddle_stride2_16_f32[8];
-extern float32_t rearranged_twiddle_stride3_16_f32[8];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_64) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_64_f32[3];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_64_f32[3];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_64_f32[3];
-extern float32_t rearranged_twiddle_stride1_64_f32[40];
-extern float32_t rearranged_twiddle_stride2_64_f32[40];
-extern float32_t rearranged_twiddle_stride3_64_f32[40];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_256) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_256_f32[4];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_256_f32[4];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_256_f32[4];
-extern float32_t rearranged_twiddle_stride1_256_f32[168];
-extern float32_t rearranged_twiddle_stride2_256_f32[168];
-extern float32_t rearranged_twiddle_stride3_256_f32[168];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_1024) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_f32[5];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_f32[5];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_f32[5];
-extern float32_t rearranged_twiddle_stride1_1024_f32[680];
-extern float32_t rearranged_twiddle_stride2_1024_f32[680];
-extern float32_t rearranged_twiddle_stride3_1024_f32[680];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F32_4096) || defined(ARM_TABLE_TWIDDLECOEF_F32_8192)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_f32[6];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_f32[6];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_f32[6];
-extern float32_t rearranged_twiddle_stride1_4096_f32[2728];
-extern float32_t rearranged_twiddle_stride2_4096_f32[2728];
-extern float32_t rearranged_twiddle_stride3_4096_f32[2728];
-#endif
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_16) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_16_q31[2];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_16_q31[2];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_16_q31[2];
-extern q31_t rearranged_twiddle_stride1_16_q31[8];
-extern q31_t rearranged_twiddle_stride2_16_q31[8];
-extern q31_t rearranged_twiddle_stride3_16_q31[8];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_64) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_64_q31[3];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_64_q31[3];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_64_q31[3];
-extern q31_t rearranged_twiddle_stride1_64_q31[40];
-extern q31_t rearranged_twiddle_stride2_64_q31[40];
-extern q31_t rearranged_twiddle_stride3_64_q31[40];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_256) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_256_q31[4];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_256_q31[4];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_256_q31[4];
-extern q31_t rearranged_twiddle_stride1_256_q31[168];
-extern q31_t rearranged_twiddle_stride2_256_q31[168];
-extern q31_t rearranged_twiddle_stride3_256_q31[168];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_q31[5];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_q31[5];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_q31[5];
-extern q31_t rearranged_twiddle_stride1_1024_q31[680];
-extern q31_t rearranged_twiddle_stride2_1024_q31[680];
-extern q31_t rearranged_twiddle_stride3_1024_q31[680];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) || defined(ARM_TABLE_TWIDDLECOEF_Q31_8192)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_q31[6];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_q31[6];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_q31[6];
-extern q31_t rearranged_twiddle_stride1_4096_q31[2728];
-extern q31_t rearranged_twiddle_stride2_4096_q31[2728];
-extern q31_t rearranged_twiddle_stride3_4096_q31[2728];
-#endif
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#endif /* defined(ARM_MATH_MVEI) */
-
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_16) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_16_q15[2];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_16_q15[2];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_16_q15[2];
-extern q15_t rearranged_twiddle_stride1_16_q15[8];
-extern q15_t rearranged_twiddle_stride2_16_q15[8];
-extern q15_t rearranged_twiddle_stride3_16_q15[8];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_64) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_64_q15[3];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_64_q15[3];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_64_q15[3];
-extern q15_t rearranged_twiddle_stride1_64_q15[40];
-extern q15_t rearranged_twiddle_stride2_64_q15[40];
-extern q15_t rearranged_twiddle_stride3_64_q15[40];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_256) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_256_q15[4];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_256_q15[4];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_256_q15[4];
-extern q15_t rearranged_twiddle_stride1_256_q15[168];
-extern q15_t rearranged_twiddle_stride2_256_q15[168];
-extern q15_t rearranged_twiddle_stride3_256_q15[168];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_q15[5];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_q15[5];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_q15[5];
-extern q15_t rearranged_twiddle_stride1_1024_q15[680];
-extern q15_t rearranged_twiddle_stride2_1024_q15[680];
-extern q15_t rearranged_twiddle_stride3_1024_q15[680];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) || defined(ARM_TABLE_TWIDDLECOEF_Q15_8192)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_q15[6];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_q15[6];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_q15[6];
-extern q15_t rearranged_twiddle_stride1_4096_q15[2728];
-extern q15_t rearranged_twiddle_stride2_4096_q15[2728];
-extern q15_t rearranged_twiddle_stride3_4096_q15[2728];
-#endif
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#endif /* defined(ARM_MATH_MVEI) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*_ARM_MVE_TABLES_H*/
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/arm_mve_tables_f16.h b/Libraries/CMSIS/5.9.0/DSP/Include/arm_mve_tables_f16.h
deleted file mode 100644
index 474fedcb950..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/arm_mve_tables_f16.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* ----------------------------------------------------------------------
- * Project:      CMSIS DSP Library
- * Title:        arm_mve_tables_f16.h
- * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
- *               used for MVE implementation only
- *
- * @version  V1.10.0
- * @date     04 October 2021
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ARM_MVE_TABLES_F16_H
-#define _ARM_MVE_TABLES_F16_H
-
-#include "arm_math_types_f16.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_16) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_16_f16[2];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_16_f16[2];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_16_f16[2];
-extern float16_t rearranged_twiddle_stride1_16_f16[8];
-extern float16_t rearranged_twiddle_stride2_16_f16[8];
-extern float16_t rearranged_twiddle_stride3_16_f16[8];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_64) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_64_f16[3];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_64_f16[3];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_64_f16[3];
-extern float16_t rearranged_twiddle_stride1_64_f16[40];
-extern float16_t rearranged_twiddle_stride2_64_f16[40];
-extern float16_t rearranged_twiddle_stride3_64_f16[40];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_256) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_256_f16[4];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_256_f16[4];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_256_f16[4];
-extern float16_t rearranged_twiddle_stride1_256_f16[168];
-extern float16_t rearranged_twiddle_stride2_256_f16[168];
-extern float16_t rearranged_twiddle_stride3_256_f16[168];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_1024) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_f16[5];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_f16[5];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_f16[5];
-extern float16_t rearranged_twiddle_stride1_1024_f16[680];
-extern float16_t rearranged_twiddle_stride2_1024_f16[680];
-extern float16_t rearranged_twiddle_stride3_1024_f16[680];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || \
-    defined(ARM_TABLE_TWIDDLECOEF_F16_4096) || defined(ARM_TABLE_TWIDDLECOEF_F16_8192)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_f16[6];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_f16[6];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_f16[6];
-extern float16_t rearranged_twiddle_stride1_4096_f16[2728];
-extern float16_t rearranged_twiddle_stride2_4096_f16[2728];
-extern float16_t rearranged_twiddle_stride3_4096_f16[2728];
-#endif
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*_ARM_MVE_TABLES_F16_H*/
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_utils.h b/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_utils.h
deleted file mode 100644
index de87ab1417b..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/matrix_utils.h
+++ /dev/null
@@ -1,569 +0,0 @@
-/******************************************************************************
- * @file     matrix_utils.h
- * @brief    Public header file for CMSIS DSP Library
- * @version  V1.11.0
- * @date     30 May 2022
- * Target Processor: Cortex-M and Cortex-A cores
- ******************************************************************************/
-/*
- * Copyright (c) 2010-2022 Arm Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _MATRIX_UTILS_H_
-#define _MATRIX_UTILS_H_
-
-#include "arm_math_types.h"
-#include "arm_math_memory.h"
-
-#include "dsp/none.h"
-#include "dsp/utils.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ELEM(A, ROW, COL) &((A)->pData[(A)->numCols * (ROW) + (COL)])
-
-#define SCALE_COL_T(T, CAST, A, ROW, v, i)     \
-    {                                          \
-        int32_t w;                             \
-        T *data = (A)->pData;                  \
-        const int32_t numCols = (A)->numCols;  \
-        const int32_t nb = (A)->numRows - ROW; \
-                                               \
-        data += i + numCols * (ROW);           \
-                                               \
-        for (w = 0; w < nb; w++) {             \
-            *data *= CAST v;                   \
-            data += numCols;                   \
-        }                                      \
-    }
-
-#if defined(ARM_FLOAT16_SUPPORTED)
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#define SWAP_ROWS_F16(A, COL, i, j)                          \
-    {                                                        \
-        int cnt = ((A)->numCols) - (COL);                    \
-        int32_t w;                                           \
-        float16_t *data = (A)->pData;                        \
-        const int32_t numCols = (A)->numCols;                \
-                                                             \
-        for (w = (COL); w < numCols; w += 8) {               \
-            f16x8_t tmpa, tmpb;                              \
-            mve_pred16_t p0 = vctp16q(cnt);                  \
-                                                             \
-            tmpa = vldrhq_z_f16(&data[i * numCols + w], p0); \
-            tmpb = vldrhq_z_f16(&data[j * numCols + w], p0); \
-                                                             \
-            vstrhq_p(&data[i * numCols + w], tmpb, p0);      \
-            vstrhq_p(&data[j * numCols + w], tmpa, p0);      \
-                                                             \
-            cnt -= 8;                                        \
-        }                                                    \
-    }
-
-#define SCALE_ROW_F16(A, COL, v, i)                          \
-    {                                                        \
-        int cnt = ((A)->numCols) - (COL);                    \
-        int32_t w;                                           \
-        float16_t *data = (A)->pData;                        \
-        const int32_t numCols = (A)->numCols;                \
-                                                             \
-        for (w = (COL); w < numCols; w += 8) {               \
-            f16x8_t tmpa;                                    \
-            mve_pred16_t p0 = vctp16q(cnt);                  \
-            tmpa = vldrhq_z_f16(&data[i * numCols + w], p0); \
-            tmpa = vmulq_n_f16(tmpa, (_Float16)v);           \
-            vstrhq_p(&data[i * numCols + w], tmpa, p0);      \
-            cnt -= 8;                                        \
-        }                                                    \
-    }
-
-#define MAC_ROW_F16(COL, A, i, v, B, j)                       \
-    {                                                         \
-        int cnt = ((A)->numCols) - (COL);                     \
-        int32_t w;                                            \
-        float16_t *dataA = (A)->pData;                        \
-        float16_t *dataB = (B)->pData;                        \
-        const int32_t numCols = (A)->numCols;                 \
-                                                              \
-        for (w = (COL); w < numCols; w += 8) {                \
-            f16x8_t tmpa, tmpb;                               \
-            mve_pred16_t p0 = vctp16q(cnt);                   \
-            tmpa = vldrhq_z_f16(&dataA[i * numCols + w], p0); \
-            tmpb = vldrhq_z_f16(&dataB[j * numCols + w], p0); \
-            tmpa = vfmaq_n_f16(tmpa, tmpb, v);                \
-            vstrhq_p(&dataA[i * numCols + w], tmpa, p0);      \
-            cnt -= 8;                                         \
-        }                                                     \
-    }
-
-#define MAS_ROW_F16(COL, A, i, v, B, j)                       \
-    {                                                         \
-        int cnt = ((A)->numCols) - (COL);                     \
-        int32_t w;                                            \
-        float16_t *dataA = (A)->pData;                        \
-        float16_t *dataB = (B)->pData;                        \
-        const int32_t numCols = (A)->numCols;                 \
-        f16x8_t vec = vdupq_n_f16(v);                         \
-                                                              \
-        for (w = (COL); w < numCols; w += 8) {                \
-            f16x8_t tmpa, tmpb;                               \
-            mve_pred16_t p0 = vctp16q(cnt);                   \
-            tmpa = vldrhq_z_f16(&dataA[i * numCols + w], p0); \
-            tmpb = vldrhq_z_f16(&dataB[j * numCols + w], p0); \
-            tmpa = vfmsq_f16(tmpa, tmpb, vec);                \
-            vstrhq_p(&dataA[i * numCols + w], tmpa, p0);      \
-            cnt -= 8;                                         \
-        }                                                     \
-    }
-
-#else
-
-#define SWAP_ROWS_F16(A, COL, i, j)           \
-    {                                         \
-        int32_t w;                            \
-        float16_t *dataI = (A)->pData;        \
-        float16_t *dataJ = (A)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        dataI += i * numCols + (COL);         \
-        dataJ += j * numCols + (COL);         \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            float16_t tmp;                    \
-            tmp = *dataI;                     \
-            *dataI++ = *dataJ;                \
-            *dataJ++ = tmp;                   \
-        }                                     \
-    }
-
-#define SCALE_ROW_F16(A, COL, v, i)           \
-    {                                         \
-        int32_t w;                            \
-        float16_t *data = (A)->pData;         \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        data += i * numCols + (COL);          \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *data++ *= (_Float16)v;           \
-        }                                     \
-    }
-
-#define MAC_ROW_F16(COL, A, i, v, B, j)                   \
-    {                                                     \
-        int32_t w;                                        \
-        float16_t *dataA = (A)->pData;                    \
-        float16_t *dataB = (B)->pData;                    \
-        const int32_t numCols = (A)->numCols;             \
-        const int32_t nb = numCols - (COL);               \
-                                                          \
-        dataA += i * numCols + (COL);                     \
-        dataB += j * numCols + (COL);                     \
-                                                          \
-        for (w = 0; w < nb; w++) {                        \
-            *dataA++ += (_Float16)v * (_Float16)*dataB++; \
-        }                                                 \
-    }
-
-#define MAS_ROW_F16(COL, A, i, v, B, j)                   \
-    {                                                     \
-        int32_t w;                                        \
-        float16_t *dataA = (A)->pData;                    \
-        float16_t *dataB = (B)->pData;                    \
-        const int32_t numCols = (A)->numCols;             \
-        const int32_t nb = numCols - (COL);               \
-                                                          \
-        dataA += i * numCols + (COL);                     \
-        dataB += j * numCols + (COL);                     \
-                                                          \
-        for (w = 0; w < nb; w++) {                        \
-            *dataA++ -= (_Float16)v * (_Float16)*dataB++; \
-        }                                                 \
-    }
-
-#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
-
-#define SCALE_COL_F16(A, ROW, v, i) SCALE_COL_T(float16_t, (_Float16), A, ROW, v, i)
-
-#endif /* defined(ARM_FLOAT16_SUPPORTED)*/
-
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#define SWAP_ROWS_F32(A, COL, i, j)                          \
-    {                                                        \
-        int cnt = ((A)->numCols) - (COL);                    \
-        float32_t *data = (A)->pData;                        \
-        const int32_t numCols = (A)->numCols;                \
-        int32_t w;                                           \
-                                                             \
-        for (w = (COL); w < numCols; w += 4) {               \
-            f32x4_t tmpa, tmpb;                              \
-            mve_pred16_t p0 = vctp32q(cnt);                  \
-                                                             \
-            tmpa = vldrwq_z_f32(&data[i * numCols + w], p0); \
-            tmpb = vldrwq_z_f32(&data[j * numCols + w], p0); \
-                                                             \
-            vstrwq_p(&data[i * numCols + w], tmpb, p0);      \
-            vstrwq_p(&data[j * numCols + w], tmpa, p0);      \
-                                                             \
-            cnt -= 4;                                        \
-        }                                                    \
-    }
-
-#define MAC_ROW_F32(COL, A, i, v, B, j)                       \
-    {                                                         \
-        int cnt = ((A)->numCols) - (COL);                     \
-        float32_t *dataA = (A)->pData;                        \
-        float32_t *dataB = (B)->pData;                        \
-        const int32_t numCols = (A)->numCols;                 \
-        int32_t w;                                            \
-                                                              \
-        for (w = (COL); w < numCols; w += 4) {                \
-            f32x4_t tmpa, tmpb;                               \
-            mve_pred16_t p0 = vctp32q(cnt);                   \
-            tmpa = vldrwq_z_f32(&dataA[i * numCols + w], p0); \
-            tmpb = vldrwq_z_f32(&dataB[j * numCols + w], p0); \
-            tmpa = vfmaq_n_f32(tmpa, tmpb, v);                \
-            vstrwq_p(&dataA[i * numCols + w], tmpa, p0);      \
-            cnt -= 4;                                         \
-        }                                                     \
-    }
-
-#define MAS_ROW_F32(COL, A, i, v, B, j)                       \
-    {                                                         \
-        int cnt = ((A)->numCols) - (COL);                     \
-        float32_t *dataA = (A)->pData;                        \
-        float32_t *dataB = (B)->pData;                        \
-        const int32_t numCols = (A)->numCols;                 \
-        int32_t w;                                            \
-        f32x4_t vec = vdupq_n_f32(v);                         \
-                                                              \
-        for (w = (COL); w < numCols; w += 4) {                \
-            f32x4_t tmpa, tmpb;                               \
-            mve_pred16_t p0 = vctp32q(cnt);                   \
-            tmpa = vldrwq_z_f32(&dataA[i * numCols + w], p0); \
-            tmpb = vldrwq_z_f32(&dataB[j * numCols + w], p0); \
-            tmpa = vfmsq_f32(tmpa, tmpb, vec);                \
-            vstrwq_p(&dataA[i * numCols + w], tmpa, p0);      \
-            cnt -= 4;                                         \
-        }                                                     \
-    }
-
-#define SCALE_ROW_F32(A, COL, v, i)                          \
-    {                                                        \
-        int cnt = ((A)->numCols) - (COL);                    \
-        float32_t *data = (A)->pData;                        \
-        const int32_t numCols = (A)->numCols;                \
-        int32_t w;                                           \
-                                                             \
-        for (w = (COL); w < numCols; w += 4) {               \
-            f32x4_t tmpa;                                    \
-            mve_pred16_t p0 = vctp32q(cnt);                  \
-            tmpa = vldrwq_z_f32(&data[i * numCols + w], p0); \
-            tmpa = vmulq_n_f32(tmpa, v);                     \
-            vstrwq_p(&data[i * numCols + w], tmpa, p0);      \
-            cnt -= 4;                                        \
-        }                                                    \
-    }
-
-#elif defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#define SWAP_ROWS_F32(A, COL, i, j)           \
-    {                                         \
-        int32_t w;                            \
-        float32_t *dataI = (A)->pData;        \
-        float32_t *dataJ = (A)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - COL;     \
-                                              \
-        dataI += i * numCols + (COL);         \
-        dataJ += j * numCols + (COL);         \
-                                              \
-        float32_t tmp;                        \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            tmp = *dataI;                     \
-            *dataI++ = *dataJ;                \
-            *dataJ++ = tmp;                   \
-        }                                     \
-    }
-
-#define MAC_ROW_F32(COL, A, i, v, B, j)        \
-    {                                          \
-        float32_t *dataA = (A)->pData;         \
-        float32_t *dataB = (B)->pData;         \
-        const int32_t numCols = (A)->numCols;  \
-        const int32_t nb = numCols - (COL);    \
-        int32_t nbElems;                       \
-        f32x4_t vec = vdupq_n_f32(v);          \
-                                               \
-        nbElems = nb >> 2;                     \
-                                               \
-        dataA += i * numCols + (COL);          \
-        dataB += j * numCols + (COL);          \
-                                               \
-        while (nbElems > 0) {                  \
-            f32x4_t tmpa, tmpb;                \
-            tmpa = vld1q_f32(dataA, p0);       \
-            tmpb = vld1q_f32(dataB, p0);       \
-            tmpa = vmlaq_f32(tmpa, tmpb, vec); \
-            vst1q_f32(dataA, tmpa, p0);        \
-            nbElems--;                         \
-            dataA += 4;                        \
-            dataB += 4;                        \
-        }                                      \
-                                               \
-        nbElems = nb & 3;                      \
-        while (nbElems > 0) {                  \
-            *dataA++ += v * *dataB++;          \
-            nbElems--;                         \
-        }                                      \
-    }
-
-#define MAS_ROW_F32(COL, A, i, v, B, j)        \
-    {                                          \
-        float32_t *dataA = (A)->pData;         \
-        float32_t *dataB = (B)->pData;         \
-        const int32_t numCols = (A)->numCols;  \
-        const int32_t nb = numCols - (COL);    \
-        int32_t nbElems;                       \
-        f32x4_t vec = vdupq_n_f32(v);          \
-                                               \
-        nbElems = nb >> 2;                     \
-                                               \
-        dataA += i * numCols + (COL);          \
-        dataB += j * numCols + (COL);          \
-                                               \
-        while (nbElems > 0) {                  \
-            f32x4_t tmpa, tmpb;                \
-            tmpa = vld1q_f32(dataA);           \
-            tmpb = vld1q_f32(dataB);           \
-            tmpa = vmlsq_f32(tmpa, tmpb, vec); \
-            vst1q_f32(dataA, tmpa);            \
-            nbElems--;                         \
-            dataA += 4;                        \
-            dataB += 4;                        \
-        }                                      \
-                                               \
-        nbElems = nb & 3;                      \
-        while (nbElems > 0) {                  \
-            *dataA++ -= v * *dataB++;          \
-            nbElems--;                         \
-        }                                      \
-    }
-
-#define SCALE_ROW_F32(A, COL, v, i)           \
-    {                                         \
-        float32_t *data = (A)->pData;         \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-        int32_t nbElems;                      \
-        f32x4_t vec = vdupq_n_f32(v);         \
-                                              \
-        nbElems = nb >> 2;                    \
-                                              \
-        data += i * numCols + (COL);          \
-        while (nbElems > 0) {                 \
-            f32x4_t tmpa;                     \
-            tmpa = vld1q_f32(data);           \
-            tmpa = vmulq_f32(tmpa, vec);      \
-            vst1q_f32(data, tmpa);            \
-            data += 4;                        \
-            nbElems--;                        \
-        }                                     \
-                                              \
-        nbElems = nb & 3;                     \
-        while (nbElems > 0) {                 \
-            *data++ *= v;                     \
-            nbElems--;                        \
-        }                                     \
-    }
-
-#else
-
-#define SWAP_ROWS_F32(A, COL, i, j)           \
-    {                                         \
-        int32_t w;                            \
-        float32_t tmp;                        \
-        float32_t *dataI = (A)->pData;        \
-        float32_t *dataJ = (A)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - COL;     \
-                                              \
-        dataI += i * numCols + (COL);         \
-        dataJ += j * numCols + (COL);         \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            tmp = *dataI;                     \
-            *dataI++ = *dataJ;                \
-            *dataJ++ = tmp;                   \
-        }                                     \
-    }
-
-#define SCALE_ROW_F32(A, COL, v, i)           \
-    {                                         \
-        int32_t w;                            \
-        float32_t *data = (A)->pData;         \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - COL;     \
-                                              \
-        data += i * numCols + (COL);          \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *data++ *= v;                     \
-        }                                     \
-    }
-
-#define MAC_ROW_F32(COL, A, i, v, B, j)       \
-    {                                         \
-        int32_t w;                            \
-        float32_t *dataA = (A)->pData;        \
-        float32_t *dataB = (B)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        dataA = dataA + i * numCols + (COL);  \
-        dataB = dataB + j * numCols + (COL);  \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *dataA++ += v * *dataB++;         \
-        }                                     \
-    }
-
-#define MAS_ROW_F32(COL, A, i, v, B, j)       \
-    {                                         \
-        int32_t w;                            \
-        float32_t *dataA = (A)->pData;        \
-        float32_t *dataB = (B)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        dataA = dataA + i * numCols + (COL);  \
-        dataB = dataB + j * numCols + (COL);  \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *dataA++ -= v * *dataB++;         \
-        }                                     \
-    }
-
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-#define SWAP_COLS_F32(A, COL, i, j)                        \
-    {                                                      \
-        int32_t w;                                         \
-        float32_t *data = (A)->pData;                      \
-        const int32_t numCols = (A)->numCols;              \
-        for (w = (COL); w < numCols; w++) {                \
-            float32_t tmp;                                 \
-            tmp = data[w * numCols + i];                   \
-            data[w * numCols + i] = data[w * numCols + j]; \
-            data[w * numCols + j] = tmp;                   \
-        }                                                  \
-    }
-
-#define SCALE_COL_F32(A, ROW, v, i) SCALE_COL_T(float32_t, , A, ROW, v, i)
-
-#define SWAP_ROWS_F64(A, COL, i, j)           \
-    {                                         \
-        int32_t w;                            \
-        float64_t *dataI = (A)->pData;        \
-        float64_t *dataJ = (A)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        dataI += i * numCols + (COL);         \
-        dataJ += j * numCols + (COL);         \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            float64_t tmp;                    \
-            tmp = *dataI;                     \
-            *dataI++ = *dataJ;                \
-            *dataJ++ = tmp;                   \
-        }                                     \
-    }
-
-#define SWAP_COLS_F64(A, COL, i, j)                        \
-    {                                                      \
-        int32_t w;                                         \
-        float64_t *data = (A)->pData;                      \
-        const int32_t numCols = (A)->numCols;              \
-        for (w = (COL); w < numCols; w++) {                \
-            float64_t tmp;                                 \
-            tmp = data[w * numCols + i];                   \
-            data[w * numCols + i] = data[w * numCols + j]; \
-            data[w * numCols + j] = tmp;                   \
-        }                                                  \
-    }
-
-#define SCALE_ROW_F64(A, COL, v, i)           \
-    {                                         \
-        int32_t w;                            \
-        float64_t *data = (A)->pData;         \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        data += i * numCols + (COL);          \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *data++ *= v;                     \
-        }                                     \
-    }
-
-#define SCALE_COL_F64(A, ROW, v, i) SCALE_COL_T(float64_t, , A, ROW, v, i)
-
-#define MAC_ROW_F64(COL, A, i, v, B, j)       \
-    {                                         \
-        int32_t w;                            \
-        float64_t *dataA = (A)->pData;        \
-        float64_t *dataB = (B)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        dataA += i * numCols + (COL);         \
-        dataB += j * numCols + (COL);         \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *dataA++ += v * *dataB++;         \
-        }                                     \
-    }
-
-#define MAS_ROW_F64(COL, A, i, v, B, j)       \
-    {                                         \
-        int32_t w;                            \
-        float64_t *dataA = (A)->pData;        \
-        float64_t *dataB = (B)->pData;        \
-        const int32_t numCols = (A)->numCols; \
-        const int32_t nb = numCols - (COL);   \
-                                              \
-        dataA += i * numCols + (COL);         \
-        dataB += j * numCols + (COL);         \
-                                              \
-        for (w = 0; w < nb; w++) {            \
-            *dataA++ -= v * *dataB++;         \
-        }                                     \
-    }
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ifndef _MATRIX_UTILS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/none.h b/Libraries/CMSIS/5.9.0/DSP/Include/dsp/none.h
deleted file mode 100644
index 45042dafc9a..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/none.h
+++ /dev/null
@@ -1,471 +0,0 @@
-/******************************************************************************
- * @file     none.h
- * @brief    Intrinsincs when no DSP extension available
- * @version  V1.9.0
- * @date     20. July 2020
- ******************************************************************************/
-/*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-
-Definitions in this file are allowing to reuse some versions of the
-CMSIS-DSP to build on a core (M0 for instance) or a host where
-DSP extension are not available.
-
-Ideally a pure C version should have been used instead.
-But those are not always available or use a restricted set
-of intrinsics.
-
-*/
-
-#ifndef _NONE_H_
-#define _NONE_H_
-
-#include "arm_math_types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-
-Normally those kind of definitions are in a compiler file
-in Core or Core_A.
-
-But for MSVC compiler it is a bit special. The goal is very specific
-to CMSIS-DSP and only to allow the use of this library from other
-systems like Python or Matlab.
-
-MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
-compiler file in Core or Core_A would not make sense.
-
-*/
-#if defined(_MSC_VER) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
-__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
-{
-    if (data == 0U) {
-        return 32U;
-    }
-
-    uint32_t count = 0U;
-    uint32_t mask = 0x80000000U;
-
-    while ((data & mask) == 0U) {
-        count += 1U;
-        mask = mask >> 1U;
-    }
-    return count;
-}
-
-__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
-{
-    if ((sat >= 1U) && (sat <= 32U)) {
-        const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
-        const int32_t min = -1 - max;
-        if (val > max) {
-            return max;
-        } else if (val < min) {
-            return min;
-        }
-    }
-    return val;
-}
-
-__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
-{
-    if (sat <= 31U) {
-        const uint32_t max = ((1U << sat) - 1U);
-        if (val > (int32_t)max) {
-            return max;
-        } else if (val < 0) {
-            return 0U;
-        }
-    }
-    return (uint32_t)val;
-}
-
-/**
-  \brief   Rotate Right in unsigned value (32 bit)
-  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
-  \param [in]    op1  Value to rotate
-  \param [in]    op2  Number of Bits to rotate
-  \return               Rotated value
- */
-__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
-{
-    op2 %= 32U;
-    if (op2 == 0U) {
-        return op1;
-    }
-    return (op1 >> op2) | (op1 << (32U - op2));
-}
-
-#endif
-
-/**
-   * @brief Clips Q63 to Q31 values.
-   */
-__STATIC_FORCEINLINE q31_t clip_q63_to_q31(q63_t x)
-{
-    return ((q31_t)(x >> 32) != ((q31_t)x >> 31)) ? ((0x7FFFFFFF ^ ((q31_t)(x >> 63)))) : (q31_t)x;
-}
-
-/**
-   * @brief Clips Q63 to Q15 values.
-   */
-__STATIC_FORCEINLINE q15_t clip_q63_to_q15(q63_t x)
-{
-    return ((q31_t)(x >> 32) != ((q31_t)x >> 31)) ? ((0x7FFF ^ ((q15_t)(x >> 63)))) :
-                                                    (q15_t)(x >> 15);
-}
-
-/**
-   * @brief Clips Q31 to Q7 values.
-   */
-__STATIC_FORCEINLINE q7_t clip_q31_to_q7(q31_t x)
-{
-    return ((q31_t)(x >> 24) != ((q31_t)x >> 23)) ? ((0x7F ^ ((q7_t)(x >> 31)))) : (q7_t)x;
-}
-
-/**
-   * @brief Clips Q31 to Q15 values.
-   */
-__STATIC_FORCEINLINE q15_t clip_q31_to_q15(q31_t x)
-{
-    return ((q31_t)(x >> 16) != ((q31_t)x >> 15)) ? ((0x7FFF ^ ((q15_t)(x >> 31)))) : (q15_t)x;
-}
-
-/**
-   * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
-   */
-__STATIC_FORCEINLINE q63_t mult32x64(q63_t x, q31_t y)
-{
-    return ((((q63_t)(x & 0x00000000FFFFFFFF) * y) >> 32) + (((q63_t)(x >> 32) * y)));
-}
-
-/* SMMLAR */
-#define multAcc_32x32_keep32_R(a, x, y) \
-    a = (q31_t)(((((q63_t)a) << 32) + ((q63_t)x * y) + 0x80000000LL) >> 32)
-
-/* SMMLSR */
-#define multSub_32x32_keep32_R(a, x, y) \
-    a = (q31_t)(((((q63_t)a) << 32) - ((q63_t)x * y) + 0x80000000LL) >> 32)
-
-/* SMMULR */
-#define mult_32x32_keep32_R(a, x, y) a = (q31_t)(((q63_t)x * y + 0x80000000LL) >> 32)
-
-/* SMMLA */
-#define multAcc_32x32_keep32(a, x, y) a += (q31_t)(((q63_t)x * y) >> 32)
-
-/* SMMLS */
-#define multSub_32x32_keep32(a, x, y) a -= (q31_t)(((q63_t)x * y) >> 32)
-
-/* SMMUL */
-#define mult_32x32_keep32(a, x, y) a = (q31_t)(((q63_t)x * y) >> 32)
-
-#ifndef ARM_MATH_DSP
-/**
-   * @brief definition to pack two 16 bit values.
-   */
-#define __PKHBT(ARG1, ARG2, ARG3)                     \
-    ((((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \
-     (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000))
-#define __PKHTB(ARG1, ARG2, ARG3)                     \
-    ((((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \
-     (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF))
-#endif
-
-/**
-   * @brief definition to pack four 8 bit values.
-   */
-#ifndef ARM_MATH_BIG_ENDIAN
-#define __PACKq7(v0, v1, v2, v3)                                                                   \
-    ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \
-     (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |                                               \
-     (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
-#else
-#define __PACKq7(v0, v1, v2, v3)                                                                   \
-    ((((int32_t)(v3) << 0) & (int32_t)0x000000FF) | (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \
-     (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |                                               \
-     (((int32_t)(v0) << 24) & (int32_t)0xFF000000))
-#endif
-
-/*
- * @brief C custom defined intrinsic functions
- */
-#if !defined(ARM_MATH_DSP)
-
-/*
-   * @brief C custom defined QADD8
-   */
-__STATIC_FORCEINLINE uint32_t __QADD8(uint32_t x, uint32_t y)
-{
-    q31_t r, s, t, u;
-
-    r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
-    s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
-    t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
-    u = __SSAT(((((q31_t)x) >> 24) + (((q31_t)y) >> 24)), 8) & (int32_t)0x000000FF;
-
-    return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r)));
-}
-
-/*
-   * @brief C custom defined QSUB8
-   */
-__STATIC_FORCEINLINE uint32_t __QSUB8(uint32_t x, uint32_t y)
-{
-    q31_t r, s, t, u;
-
-    r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
-    s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
-    t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
-    u = __SSAT(((((q31_t)x) >> 24) - (((q31_t)y) >> 24)), 8) & (int32_t)0x000000FF;
-
-    return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r)));
-}
-
-/*
-   * @brief C custom defined QADD16
-   */
-__STATIC_FORCEINLINE uint32_t __QADD16(uint32_t x, uint32_t y)
-{
-    /*  q31_t r,     s;  without initialisation 'arm_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
-    q31_t r = 0, s = 0;
-
-    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
-    s = __SSAT(((((q31_t)x) >> 16) + (((q31_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined SHADD16
-   */
-__STATIC_FORCEINLINE uint32_t __SHADD16(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-    s = (((((q31_t)x) >> 16) + (((q31_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined QSUB16
-   */
-__STATIC_FORCEINLINE uint32_t __QSUB16(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
-    s = __SSAT(((((q31_t)x) >> 16) - (((q31_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined SHSUB16
-   */
-__STATIC_FORCEINLINE uint32_t __SHSUB16(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-    s = (((((q31_t)x) >> 16) - (((q31_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined QASX
-   */
-__STATIC_FORCEINLINE uint32_t __QASX(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
-    s = __SSAT(((((q31_t)x) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined SHASX
-   */
-__STATIC_FORCEINLINE uint32_t __SHASX(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-    s = (((((q31_t)x) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined QSAX
-   */
-__STATIC_FORCEINLINE uint32_t __QSAX(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y) >> 16)), 16) & (int32_t)0x0000FFFF;
-    s = __SSAT(((((q31_t)x) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined SHSAX
-   */
-__STATIC_FORCEINLINE uint32_t __SHSAX(uint32_t x, uint32_t y)
-{
-    q31_t r, s;
-
-    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-    s = (((((q31_t)x) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
-
-    return ((uint32_t)((s << 16) | (r)));
-}
-
-/*
-   * @brief C custom defined SMUSDX
-   */
-__STATIC_FORCEINLINE uint32_t __SMUSDX(uint32_t x, uint32_t y)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y) >> 16)) -
-                       ((((q31_t)x) >> 16) * (((q31_t)y << 16) >> 16))));
-}
-
-/*
-   * @brief C custom defined SMUADX
-   */
-__STATIC_FORCEINLINE uint32_t __SMUADX(uint32_t x, uint32_t y)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y) >> 16)) +
-                       ((((q31_t)x) >> 16) * (((q31_t)y << 16) >> 16))));
-}
-
-/*
-   * @brief C custom defined QADD
-   */
-__STATIC_FORCEINLINE int32_t __QADD(int32_t x, int32_t y)
-{
-    return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
-}
-
-/*
-   * @brief C custom defined QSUB
-   */
-__STATIC_FORCEINLINE int32_t __QSUB(int32_t x, int32_t y)
-{
-    return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
-}
-
-/*
-   * @brief C custom defined SMLAD
-   */
-__STATIC_FORCEINLINE uint32_t __SMLAD(uint32_t x, uint32_t y, uint32_t sum)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
-                       ((((q31_t)x) >> 16) * (((q31_t)y) >> 16)) + (((q31_t)sum))));
-}
-
-/*
-   * @brief C custom defined SMLADX
-   */
-__STATIC_FORCEINLINE uint32_t __SMLADX(uint32_t x, uint32_t y, uint32_t sum)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y) >> 16)) +
-                       ((((q31_t)x) >> 16) * (((q31_t)y << 16) >> 16)) + (((q31_t)sum))));
-}
-
-/*
-   * @brief C custom defined SMLSDX
-   */
-__STATIC_FORCEINLINE uint32_t __SMLSDX(uint32_t x, uint32_t y, uint32_t sum)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y) >> 16)) -
-                       ((((q31_t)x) >> 16) * (((q31_t)y << 16) >> 16)) + (((q31_t)sum))));
-}
-
-/*
-   * @brief C custom defined SMLALD
-   */
-__STATIC_FORCEINLINE uint64_t __SMLALD(uint32_t x, uint32_t y, uint64_t sum)
-{
-    /*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
-    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
-                       ((((q31_t)x) >> 16) * (((q31_t)y) >> 16)) + (((q63_t)sum))));
-}
-
-/*
-   * @brief C custom defined SMLALDX
-   */
-__STATIC_FORCEINLINE uint64_t __SMLALDX(uint32_t x, uint32_t y, uint64_t sum)
-{
-    /*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
-    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y) >> 16)) +
-                       ((((q31_t)x) >> 16) * (((q31_t)y << 16) >> 16)) + (((q63_t)sum))));
-}
-
-/*
-   * @brief C custom defined SMUAD
-   */
-__STATIC_FORCEINLINE uint32_t __SMUAD(uint32_t x, uint32_t y)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
-                       ((((q31_t)x) >> 16) * (((q31_t)y) >> 16))));
-}
-
-/*
-   * @brief C custom defined SMUSD
-   */
-__STATIC_FORCEINLINE uint32_t __SMUSD(uint32_t x, uint32_t y)
-{
-    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
-                       ((((q31_t)x) >> 16) * (((q31_t)y) >> 16))));
-}
-
-/*
-   * @brief C custom defined SXTB16
-   */
-__STATIC_FORCEINLINE uint32_t __SXTB16(uint32_t x)
-{
-    return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
-                       ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000)));
-}
-
-/*
-   * @brief C custom defined SMMLA
-   */
-__STATIC_FORCEINLINE int32_t __SMMLA(int32_t x, int32_t y, int32_t sum)
-{
-    return (sum + (int32_t)(((int64_t)x * y) >> 32));
-}
-
-#endif /* !defined (ARM_MATH_DSP) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/transform_functions.h b/Libraries/CMSIS/5.9.0/DSP/Include/dsp/transform_functions.h
deleted file mode 100644
index cef55bda62e..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/transform_functions.h
+++ /dev/null
@@ -1,597 +0,0 @@
-/******************************************************************************
- * @file     transform_functions.h
- * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
- * Target Processor: Cortex-M and Cortex-A cores
- ******************************************************************************/
-/*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _TRANSFORM_FUNCTIONS_H_
-#define _TRANSFORM_FUNCTIONS_H_
-
-#include "arm_math_types.h"
-#include "arm_math_memory.h"
-
-#include "dsp/none.h"
-#include "dsp/utils.h"
-
-#include "dsp/basic_math_functions.h"
-#include "dsp/complex_math_functions.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @defgroup groupTransforms Transform Functions
- */
-
-/**
-   * @brief Instance structure for the Q15 CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-} arm_cfft_radix2_instance_q15;
-
-/* Deprecated */
-arm_status arm_cfft_radix2_init_q15(arm_cfft_radix2_instance_q15 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix2_q15(const arm_cfft_radix2_instance_q15 *S, q15_t *pSrc);
-
-/**
-   * @brief Instance structure for the Q15 CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const q15_t *pTwiddle; /**< points to the twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-} arm_cfft_radix4_instance_q15;
-
-/* Deprecated */
-arm_status arm_cfft_radix4_init_q15(arm_cfft_radix4_instance_q15 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix4_q15(const arm_cfft_radix4_instance_q15 *S, q15_t *pSrc);
-
-/**
-   * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const q31_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-} arm_cfft_radix2_instance_q31;
-
-/* Deprecated */
-arm_status arm_cfft_radix2_init_q31(arm_cfft_radix2_instance_q31 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix2_q31(const arm_cfft_radix2_instance_q31 *S, q31_t *pSrc);
-
-/**
-   * @brief Instance structure for the Q31 CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const q31_t *pTwiddle; /**< points to the twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-} arm_cfft_radix4_instance_q31;
-
-/* Deprecated */
-void arm_cfft_radix4_q31(const arm_cfft_radix4_instance_q31 *S, q31_t *pSrc);
-
-/* Deprecated */
-arm_status arm_cfft_radix4_init_q31(arm_cfft_radix4_instance_q31 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const float32_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float32_t onebyfftLen; /**< value of 1/fftLen. */
-} arm_cfft_radix2_instance_f32;
-
-/* Deprecated */
-arm_status arm_cfft_radix2_init_f32(arm_cfft_radix2_instance_f32 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix2_f32(const arm_cfft_radix2_instance_f32 *S, float32_t *pSrc);
-
-/**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const float32_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float32_t onebyfftLen; /**< value of 1/fftLen. */
-} arm_cfft_radix4_instance_f32;
-
-/* Deprecated */
-arm_status arm_cfft_radix4_init_f32(arm_cfft_radix4_instance_f32 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix4_f32(const arm_cfft_radix4_instance_f32 *S, float32_t *pSrc);
-
-/**
-   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    const q15_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t bitRevLength; /**< bit reversal table length. */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-    const uint32_t
-        *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */
-    const q15_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */
-    const q15_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */
-    const q15_t *rearranged_twiddle_stride3;
-#endif
-} arm_cfft_instance_q15;
-
-arm_status arm_cfft_init_q15(arm_cfft_instance_q15 *S, uint16_t fftLen);
-
-void arm_cfft_q15(const arm_cfft_instance_q15 *S, q15_t *p1, uint8_t ifftFlag,
-                  uint8_t bitReverseFlag);
-
-/**
-   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    const q31_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t bitRevLength; /**< bit reversal table length. */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-    const uint32_t
-        *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */
-    const q31_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */
-    const q31_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */
-    const q31_t *rearranged_twiddle_stride3;
-#endif
-} arm_cfft_instance_q31;
-
-arm_status arm_cfft_init_q31(arm_cfft_instance_q31 *S, uint16_t fftLen);
-
-void arm_cfft_q31(const arm_cfft_instance_q31 *S, q31_t *p1, uint8_t ifftFlag,
-                  uint8_t bitReverseFlag);
-
-/**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    const float32_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t bitRevLength; /**< bit reversal table length. */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-    const uint32_t
-        *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */
-    const float32_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */
-    const float32_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */
-    const float32_t *rearranged_twiddle_stride3;
-#endif
-} arm_cfft_instance_f32;
-
-arm_status arm_cfft_init_f32(arm_cfft_instance_f32 *S, uint16_t fftLen);
-
-void arm_cfft_f32(const arm_cfft_instance_f32 *S, float32_t *p1, uint8_t ifftFlag,
-                  uint8_t bitReverseFlag);
-
-/**
-   * @brief Instance structure for the Double Precision Floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    const float64_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t bitRevLength; /**< bit reversal table length. */
-} arm_cfft_instance_f64;
-
-arm_status arm_cfft_init_f64(arm_cfft_instance_f64 *S, uint16_t fftLen);
-
-void arm_cfft_f64(const arm_cfft_instance_f64 *S, float64_t *p1, uint8_t ifftFlag,
-                  uint8_t bitReverseFlag);
-
-/**
-   * @brief Instance structure for the Q15 RFFT/RIFFT function.
-   */
-typedef struct {
-    uint32_t fftLenReal; /**< length of the real FFT. */
-    uint8_t
-        ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
-    uint8_t
-        bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
-    uint32_t
-        twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    const q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
-    const q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-    arm_cfft_instance_q15 cfftInst;
-#else
-    const arm_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */
-#endif
-} arm_rfft_instance_q15;
-
-arm_status arm_rfft_init_q15(arm_rfft_instance_q15 *S, uint32_t fftLenReal, uint32_t ifftFlagR,
-                             uint32_t bitReverseFlag);
-
-void arm_rfft_q15(const arm_rfft_instance_q15 *S, q15_t *pSrc, q15_t *pDst);
-
-/**
-   * @brief Instance structure for the Q31 RFFT/RIFFT function.
-   */
-typedef struct {
-    uint32_t fftLenReal; /**< length of the real FFT. */
-    uint8_t
-        ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
-    uint8_t
-        bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
-    uint32_t
-        twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    const q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
-    const q31_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-    arm_cfft_instance_q31 cfftInst;
-#else
-    const arm_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */
-#endif
-} arm_rfft_instance_q31;
-
-arm_status arm_rfft_init_q31(arm_rfft_instance_q31 *S, uint32_t fftLenReal, uint32_t ifftFlagR,
-                             uint32_t bitReverseFlag);
-
-void arm_rfft_q31(const arm_rfft_instance_q31 *S, q31_t *pSrc, q31_t *pDst);
-
-/**
-   * @brief Instance structure for the floating-point RFFT/RIFFT function.
-   */
-typedef struct {
-    uint32_t fftLenReal; /**< length of the real FFT. */
-    uint16_t fftLenBy2; /**< length of the complex FFT. */
-    uint8_t
-        ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
-    uint8_t
-        bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
-    uint32_t
-        twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    const float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
-    const float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */
-    arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
-} arm_rfft_instance_f32;
-
-arm_status arm_rfft_init_f32(arm_rfft_instance_f32 *S, arm_cfft_radix4_instance_f32 *S_CFFT,
-                             uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag);
-
-void arm_rfft_f32(const arm_rfft_instance_f32 *S, float32_t *pSrc, float32_t *pDst);
-
-/**
-   * @brief Instance structure for the Double Precision Floating-point RFFT/RIFFT function.
-   */
-typedef struct {
-    arm_cfft_instance_f64 Sint; /**< Internal CFFT structure. */
-    uint16_t fftLenRFFT; /**< length of the real sequence */
-    const float64_t *pTwiddleRFFT; /**< Twiddle factors real stage  */
-} arm_rfft_fast_instance_f64;
-
-arm_status arm_rfft_fast_init_f64(arm_rfft_fast_instance_f64 *S, uint16_t fftLen);
-
-void arm_rfft_fast_f64(arm_rfft_fast_instance_f64 *S, float64_t *p, float64_t *pOut,
-                       uint8_t ifftFlag);
-
-/**
-   * @brief Instance structure for the floating-point RFFT/RIFFT function.
-   */
-typedef struct {
-    arm_cfft_instance_f32 Sint; /**< Internal CFFT structure. */
-    uint16_t fftLenRFFT; /**< length of the real sequence */
-    const float32_t *pTwiddleRFFT; /**< Twiddle factors real stage  */
-} arm_rfft_fast_instance_f32;
-
-arm_status arm_rfft_fast_init_f32(arm_rfft_fast_instance_f32 *S, uint16_t fftLen);
-
-void arm_rfft_fast_f32(const arm_rfft_fast_instance_f32 *S, float32_t *p, float32_t *pOut,
-                       uint8_t ifftFlag);
-
-/**
-   * @brief Instance structure for the floating-point DCT4/IDCT4 function.
-   */
-typedef struct {
-    uint16_t N; /**< length of the DCT4. */
-    uint16_t Nby2; /**< half of the length of the DCT4. */
-    float32_t normalize; /**< normalizing factor. */
-    const float32_t *pTwiddle; /**< points to the twiddle factor table. */
-    const float32_t *pCosFactor; /**< points to the cosFactor table. */
-    arm_rfft_instance_f32 *pRfft; /**< points to the real FFT instance. */
-    arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
-} arm_dct4_instance_f32;
-
-/**
-   * @brief  Initialization function for the floating-point DCT4/IDCT4.
-   * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
-   * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
-   * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
-   * @param[in]     N          length of the DCT4.
-   * @param[in]     Nby2       half of the length of the DCT4.
-   * @param[in]     normalize  normalizing factor.
-   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
-   */
-arm_status arm_dct4_init_f32(arm_dct4_instance_f32 *S, arm_rfft_instance_f32 *S_RFFT,
-                             arm_cfft_radix4_instance_f32 *S_CFFT, uint16_t N, uint16_t Nby2,
-                             float32_t normalize);
-
-/**
-   * @brief Processing function for the floating-point DCT4/IDCT4.
-   * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
-   * @param[in]     pState         points to state buffer.
-   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
-   */
-void arm_dct4_f32(const arm_dct4_instance_f32 *S, float32_t *pState, float32_t *pInlineBuffer);
-
-/**
-   * @brief Instance structure for the Q31 DCT4/IDCT4 function.
-   */
-typedef struct {
-    uint16_t N; /**< length of the DCT4. */
-    uint16_t Nby2; /**< half of the length of the DCT4. */
-    q31_t normalize; /**< normalizing factor. */
-    const q31_t *pTwiddle; /**< points to the twiddle factor table. */
-    const q31_t *pCosFactor; /**< points to the cosFactor table. */
-    arm_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */
-    arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
-} arm_dct4_instance_q31;
-
-/**
-   * @brief  Initialization function for the Q31 DCT4/IDCT4.
-   * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
-   * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
-   * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
-   * @param[in]     N          length of the DCT4.
-   * @param[in]     Nby2       half of the length of the DCT4.
-   * @param[in]     normalize  normalizing factor.
-   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
-   */
-arm_status arm_dct4_init_q31(arm_dct4_instance_q31 *S, arm_rfft_instance_q31 *S_RFFT,
-                             arm_cfft_radix4_instance_q31 *S_CFFT, uint16_t N, uint16_t Nby2,
-                             q31_t normalize);
-
-/**
-   * @brief Processing function for the Q31 DCT4/IDCT4.
-   * @param[in]     S              points to an instance of the Q31 DCT4 structure.
-   * @param[in]     pState         points to state buffer.
-   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
-   */
-void arm_dct4_q31(const arm_dct4_instance_q31 *S, q31_t *pState, q31_t *pInlineBuffer);
-
-/**
-   * @brief Instance structure for the Q15 DCT4/IDCT4 function.
-   */
-typedef struct {
-    uint16_t N; /**< length of the DCT4. */
-    uint16_t Nby2; /**< half of the length of the DCT4. */
-    q15_t normalize; /**< normalizing factor. */
-    const q15_t *pTwiddle; /**< points to the twiddle factor table. */
-    const q15_t *pCosFactor; /**< points to the cosFactor table. */
-    arm_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */
-    arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
-} arm_dct4_instance_q15;
-
-/**
-   * @brief  Initialization function for the Q15 DCT4/IDCT4.
-   * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
-   * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
-   * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
-   * @param[in]     N          length of the DCT4.
-   * @param[in]     Nby2       half of the length of the DCT4.
-   * @param[in]     normalize  normalizing factor.
-   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
-   */
-arm_status arm_dct4_init_q15(arm_dct4_instance_q15 *S, arm_rfft_instance_q15 *S_RFFT,
-                             arm_cfft_radix4_instance_q15 *S_CFFT, uint16_t N, uint16_t Nby2,
-                             q15_t normalize);
-
-/**
-   * @brief Processing function for the Q15 DCT4/IDCT4.
-   * @param[in]     S              points to an instance of the Q15 DCT4 structure.
-   * @param[in]     pState         points to state buffer.
-   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
-   */
-void arm_dct4_q15(const arm_dct4_instance_q15 *S, q15_t *pState, q15_t *pInlineBuffer);
-
-/**
-   * @brief Instance structure for the Floating-point MFCC function.
-   */
-typedef struct {
-    const float32_t *dctCoefs; /**< Internal DCT coefficients */
-    const float32_t *filterCoefs; /**< Internal Mel filter coefficients */
-    const float32_t *windowCoefs; /**< Windowing coefficients */
-    const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */
-    const uint32_t *filterLengths; /**< Internal Mel filter  lengths */
-    uint32_t fftLen; /**< FFT length */
-    uint32_t nbMelFilters; /**< Number of Mel filters */
-    uint32_t nbDctOutputs; /**< Number of DCT outputs */
-#if defined(ARM_MFCC_CFFT_BASED)
-    /* Implementation of the MFCC is using a CFFT */
-    arm_cfft_instance_f32 cfft; /**< Internal CFFT instance */
-#else
-    /* Implementation of the MFCC is using a RFFT (default) */
-    arm_rfft_fast_instance_f32 rfft;
-#endif
-} arm_mfcc_instance_f32;
-
-arm_status arm_mfcc_init_f32(arm_mfcc_instance_f32 *S, uint32_t fftLen, uint32_t nbMelFilters,
-                             uint32_t nbDctOutputs, const float32_t *dctCoefs,
-                             const uint32_t *filterPos, const uint32_t *filterLengths,
-                             const float32_t *filterCoefs, const float32_t *windowCoefs);
-
-/**
-  @brief         MFCC F32
-  @param[in]    S       points to the mfcc instance structure
-  @param[in]     pSrc points to the input samples
-  @param[out]     pDst  points to the output MFCC values
-  @param[inout]     pTmp  points to a temporary buffer of complex
-  @return        none
- */
-void arm_mfcc_f32(const arm_mfcc_instance_f32 *S, float32_t *pSrc, float32_t *pDst,
-                  float32_t *pTmp);
-
-typedef struct {
-    const q31_t *dctCoefs; /**< Internal DCT coefficients */
-    const q31_t *filterCoefs; /**< Internal Mel filter coefficients */
-    const q31_t *windowCoefs; /**< Windowing coefficients */
-    const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */
-    const uint32_t *filterLengths; /**< Internal Mel filter  lengths */
-    uint32_t fftLen; /**< FFT length */
-    uint32_t nbMelFilters; /**< Number of Mel filters */
-    uint32_t nbDctOutputs; /**< Number of DCT outputs */
-#if defined(ARM_MFCC_CFFT_BASED)
-    /* Implementation of the MFCC is using a CFFT */
-    arm_cfft_instance_q31 cfft; /**< Internal CFFT instance */
-#else
-    /* Implementation of the MFCC is using a RFFT (default) */
-    arm_rfft_instance_q31 rfft;
-#endif
-} arm_mfcc_instance_q31;
-
-arm_status arm_mfcc_init_q31(arm_mfcc_instance_q31 *S, uint32_t fftLen, uint32_t nbMelFilters,
-                             uint32_t nbDctOutputs, const q31_t *dctCoefs,
-                             const uint32_t *filterPos, const uint32_t *filterLengths,
-                             const q31_t *filterCoefs, const q31_t *windowCoefs);
-
-/**
-  @brief         MFCC Q31
-  @param[in]    S       points to the mfcc instance structure
-  @param[in]     pSrc points to the input samples
-  @param[out]     pDst  points to the output MFCC values
-  @param[inout]     pTmp  points to a temporary buffer of complex
-  @return        none
- */
-arm_status arm_mfcc_q31(const arm_mfcc_instance_q31 *S, q31_t *pSrc, q31_t *pDst, q31_t *pTmp);
-
-typedef struct {
-    const q15_t *dctCoefs; /**< Internal DCT coefficients */
-    const q15_t *filterCoefs; /**< Internal Mel filter coefficients */
-    const q15_t *windowCoefs; /**< Windowing coefficients */
-    const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */
-    const uint32_t *filterLengths; /**< Internal Mel filter  lengths */
-    uint32_t fftLen; /**< FFT length */
-    uint32_t nbMelFilters; /**< Number of Mel filters */
-    uint32_t nbDctOutputs; /**< Number of DCT outputs */
-#if defined(ARM_MFCC_CFFT_BASED)
-    /* Implementation of the MFCC is using a CFFT */
-    arm_cfft_instance_q15 cfft; /**< Internal CFFT instance */
-#else
-    /* Implementation of the MFCC is using a RFFT (default) */
-    arm_rfft_instance_q15 rfft;
-#endif
-} arm_mfcc_instance_q15;
-
-arm_status arm_mfcc_init_q15(arm_mfcc_instance_q15 *S, uint32_t fftLen, uint32_t nbMelFilters,
-                             uint32_t nbDctOutputs, const q15_t *dctCoefs,
-                             const uint32_t *filterPos, const uint32_t *filterLengths,
-                             const q15_t *filterCoefs, const q15_t *windowCoefs);
-
-/**
-  @brief         MFCC Q15
-  @param[in]    S       points to the mfcc instance structure
-  @param[in]     pSrc points to the input samples
-  @param[out]     pDst  points to the output MFCC values in q8.7 format
-  @param[inout]     pTmp  points to a temporary buffer of complex
-  @return        error status
- */
-arm_status arm_mfcc_q15(const arm_mfcc_instance_q15 *S, q15_t *pSrc, q15_t *pDst, q31_t *pTmp);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/transform_functions_f16.h b/Libraries/CMSIS/5.9.0/DSP/Include/dsp/transform_functions_f16.h
deleted file mode 100644
index 53090bc91bb..00000000000
--- a/Libraries/CMSIS/5.9.0/DSP/Include/dsp/transform_functions_f16.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/******************************************************************************
- * @file     transform_functions_f16.h
- * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
- * Target Processor: Cortex-M and Cortex-A cores
- ******************************************************************************/
-/*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _TRANSFORM_FUNCTIONS_F16_H_
-#define _TRANSFORM_FUNCTIONS_F16_H_
-
-#include "arm_math_types_f16.h"
-#include "arm_math_memory.h"
-
-#include "dsp/none.h"
-#include "dsp/utils.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(ARM_FLOAT16_SUPPORTED)
-
-/**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const float16_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float16_t onebyfftLen; /**< value of 1/fftLen. */
-} arm_cfft_radix2_instance_f16;
-
-/**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    uint8_t
-        ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t
-        bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const float16_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t
-        twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t
-        bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float16_t onebyfftLen; /**< value of 1/fftLen. */
-} arm_cfft_radix4_instance_f16;
-
-/**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-typedef struct {
-    uint16_t fftLen; /**< length of the FFT. */
-    const float16_t *pTwiddle; /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
-    uint16_t bitRevLength; /**< bit reversal table length. */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-    const uint32_t
-        *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */
-    const uint32_t
-        *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */
-    const float16_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */
-    const float16_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */
-    const float16_t *rearranged_twiddle_stride3;
-#endif
-} arm_cfft_instance_f16;
-
-arm_status arm_cfft_init_f16(arm_cfft_instance_f16 *S, uint16_t fftLen);
-
-void arm_cfft_f16(const arm_cfft_instance_f16 *S, float16_t *p1, uint8_t ifftFlag,
-                  uint8_t bitReverseFlag);
-
-/**
-   * @brief Instance structure for the floating-point RFFT/RIFFT function.
-   */
-typedef struct {
-    arm_cfft_instance_f16 Sint; /**< Internal CFFT structure. */
-    uint16_t fftLenRFFT; /**< length of the real sequence */
-    const float16_t *pTwiddleRFFT; /**< Twiddle factors real stage  */
-} arm_rfft_fast_instance_f16;
-
-arm_status arm_rfft_fast_init_f16(arm_rfft_fast_instance_f16 *S, uint16_t fftLen);
-
-void arm_rfft_fast_f16(const arm_rfft_fast_instance_f16 *S, float16_t *p, float16_t *pOut,
-                       uint8_t ifftFlag);
-
-/* Deprecated */
-arm_status arm_cfft_radix4_init_f16(arm_cfft_radix4_instance_f16 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix4_f16(const arm_cfft_radix4_instance_f16 *S, float16_t *pSrc);
-
-/* Deprecated */
-arm_status arm_cfft_radix2_init_f16(arm_cfft_radix2_instance_f16 *S, uint16_t fftLen,
-                                    uint8_t ifftFlag, uint8_t bitReverseFlag);
-
-/* Deprecated */
-void arm_cfft_radix2_f16(const arm_cfft_radix2_instance_f16 *S, float16_t *pSrc);
-
-/**
-   * @brief Instance structure for the Floating-point MFCC function.
-   */
-typedef struct {
-    const float16_t *dctCoefs; /**< Internal DCT coefficients */
-    const float16_t *filterCoefs; /**< Internal Mel filter coefficients */
-    const float16_t *windowCoefs; /**< Windowing coefficients */
-    const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */
-    const uint32_t *filterLengths; /**< Internal Mel filter  lengths */
-    uint32_t fftLen; /**< FFT length */
-    uint32_t nbMelFilters; /**< Number of Mel filters */
-    uint32_t nbDctOutputs; /**< Number of DCT outputs */
-#if defined(ARM_MFCC_CFFT_BASED)
-    /* Implementation of the MFCC is using a CFFT */
-    arm_cfft_instance_f16 cfft; /**< Internal CFFT instance */
-#else
-    /* Implementation of the MFCC is using a RFFT (default) */
-    arm_rfft_fast_instance_f16 rfft;
-#endif
-} arm_mfcc_instance_f16;
-
-arm_status arm_mfcc_init_f16(arm_mfcc_instance_f16 *S, uint32_t fftLen, uint32_t nbMelFilters,
-                             uint32_t nbDctOutputs, const float16_t *dctCoefs,
-                             const uint32_t *filterPos, const uint32_t *filterLengths,
-                             const float16_t *filterCoefs, const float16_t *windowCoefs);
-
-/**
-  @brief         MFCC F16
-  @param[in]    S       points to the mfcc instance structure
-  @param[in]     pSrc points to the input samples
-  @param[out]     pDst  points to the output MFCC values
-  @param[inout]     pTmp  points to a temporary buffer of complex
-  @return        none
- */
-void arm_mfcc_f16(const arm_mfcc_instance_f16 *S, float16_t *pSrc, float16_t *pDst,
-                  float16_t *pTmp);
-
-#endif /* defined(ARM_FLOAT16_SUPPORTED)*/
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ifndef _TRANSFORM_FUNCTIONS_F16_H_ */
diff --git a/Libraries/CMSIS/5.9.0/DSP/Lib/libarm_cortexM4l_math.a b/Libraries/CMSIS/5.9.0/DSP/Lib/libarm_cortexM4l_math.a
deleted file mode 100644
index f991257c870..00000000000
Binary files a/Libraries/CMSIS/5.9.0/DSP/Lib/libarm_cortexM4l_math.a and /dev/null differ
diff --git a/Libraries/CMSIS/5.9.0/DSP/Lib/libarm_cortexM4lf_math.a b/Libraries/CMSIS/5.9.0/DSP/Lib/libarm_cortexM4lf_math.a
deleted file mode 100644
index c8ff8077daf..00000000000
Binary files a/Libraries/CMSIS/5.9.0/DSP/Lib/libarm_cortexM4lf_math.a and /dev/null differ
diff --git a/Libraries/CMSIS/Device/Maxim/GCC/mxc_version.mk b/Libraries/CMSIS/Device/Maxim/GCC/mxc_version.mk
index a333d8f89ca..a517a803062 100644
--- a/Libraries/CMSIS/Device/Maxim/GCC/mxc_version.mk
+++ b/Libraries/CMSIS/Device/Maxim/GCC/mxc_version.mk
@@ -16,9 +16,9 @@
  #
  ##############################################################################
 # Autogenerated version info for build system.
-MSDK_VERSION_STRING := v2023_10-332-g34a7ef90c9
-MSDK_VERSION_YEAR := 2023
-MSDK_VERSION_MONTH := 10
+MSDK_VERSION_STRING := v2024_02-149-gb9966ca6d8
+MSDK_VERSION_YEAR := 2024
+MSDK_VERSION_MONTH := 2
 
 # Add root MAXIM_PATH to IPATH so compiler can locate msdk_version.h
 IPATH += $(MAXIM_PATH)
diff --git a/USERGUIDE.md b/USERGUIDE.md
index 70fd2d593fb..9c0649fb6d9 100644
--- a/USERGUIDE.md
+++ b/USERGUIDE.md
@@ -2129,12 +2129,20 @@ The following table matches external part numbers to internal die types.  This i
 
 The CMSIS-DSP library provides a suite of common **Digital Signal Processing _(DSP)_** functions that take advantage of hardware accelerated _Floating Point Unit (FPU)_ available on microcontrollers with Arm Cortex-M cores. This library is distributed in the MSDK as a pre-compiled static library file, and the MSDK maintains a port of the official code examples in the **ARM-DSP** [Examples](https://github.com/analogdevicesinc/msdk/tree/main/Examples) folder for each microcontroller.
 
-Please refer to the [CMSIS-DSP official documentation](https://www.keil.com/pack/doc/CMSIS/DSP/html/index.html) for more detailed documentation on the library functions and usage.
+Please refer to the [CMSIS-DSP official documentation](https://arm-software.github.io/CMSIS-DSP/v1.16.2/index.html) for more detailed documentation on the library functions and usage.
 
 #### CMSIS-DSP Supported Parts
 
 - All microcontrollers with a Cortex M4 core are supported.
 
+#### CMSIS-DSP Build Variables
+
+| Configuration Variable | Description                                                | Details                                                      |
+| ---------------------- | ---------------------------------------------------------- | ------------------------------------------------------------ |
+|                        |                                                            |                                                              |
+| `CMSIS_DSP_VERSION`    | (Optional) Set the CMSIS-DSP version to use.               | Defaults to `1.16.2`, which is currently the only supported version. |
+
+
 ---
 
 ### Cordio Bluetooth Low Energy