Skip to content

Commit

Permalink
Merge pull request #3050 from alibaba/feature/sync
Browse files Browse the repository at this point in the history
MNN:Sync: Sync Internal 2.9.6
  • Loading branch information
wangzhaode authored Oct 14, 2024
2 parents f830294 + 860fceb commit a74551b
Show file tree
Hide file tree
Showing 147 changed files with 6,038 additions and 2,816 deletions.
16 changes: 15 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ set(CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
"${CMAKE_CURRENT_LIST_DIR}/cmake"
)

if(WIN32)
if(NOT MSVC)
set(CMAKE_MSVC_RUNTIME_LIBRARY "")
set(MSVC_RUNTIME_LIBRARY "")
endif()
endif()

# build options
option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen" OFF)
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
Expand Down Expand Up @@ -198,7 +206,7 @@ option(MNN_METAL "Enable Metal" OFF)
option(MNN_OPENCL "Enable OpenCL" OFF)
option(MNN_OPENGL "Enable OpenGL" OFF)
option(MNN_VULKAN "Enable Vulkan" OFF)
option(MNN_ARM82 "Enable ARM82" OFF)
option(MNN_ARM82 "Enable ARMv8.2's FP16 Compute" ON)
option(MNN_ONEDNN "Enable oneDNN" OFF)
option(MNN_AVX512 "Enable AVX512" OFF)
option(MNN_CUDA "Enable CUDA" OFF)
Expand Down Expand Up @@ -452,6 +460,12 @@ set(MNN_EXTRA_DEPENDS "")
# Add Thread dependency
find_package(Threads)
list(APPEND MNN_EXTRA_DEPENDS ${CMAKE_THREAD_LIBS_INIT})
if(WIN32)
if(NOT MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld-link -lmsvcrt")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld-link -lmsvcrt")
endif()
endif()

if (NOT APPLE)
if(MNN_OPENMP)
Expand Down
36 changes: 0 additions & 36 deletions MNN.sln
Original file line number Diff line number Diff line change
@@ -1,36 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.002.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "3rd_party", "3rd_party", "{5CD18987-C4CA-49D5-942F-14B15F46B1ED}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "flatbuffers", "flatbuffers", "{89B04BB7-86CB-4D4F-B65C-C3D0995DBD36}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{797AC14A-1653-469D-A240-76EF0F36E60A}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FlatBuffers.Test", "3rd_party\flatbuffers\tests\FlatBuffers.Test\FlatBuffers.Test.csproj", "{E5A80CC7-62B1-4887-B637-455F34CCC9B3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E5A80CC7-62B1-4887-B637-455F34CCC9B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E5A80CC7-62B1-4887-B637-455F34CCC9B3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E5A80CC7-62B1-4887-B637-455F34CCC9B3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E5A80CC7-62B1-4887-B637-455F34CCC9B3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{89B04BB7-86CB-4D4F-B65C-C3D0995DBD36} = {5CD18987-C4CA-49D5-942F-14B15F46B1ED}
{797AC14A-1653-469D-A240-76EF0F36E60A} = {89B04BB7-86CB-4D4F-B65C-C3D0995DBD36}
{E5A80CC7-62B1-4887-B637-455F34CCC9B3} = {797AC14A-1653-469D-A240-76EF0F36E60A}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {11D826E1-518B-4BC2-8E45-03F5F48170D6}
EndGlobalSection
EndGlobal

This file was deleted.

This file was deleted.

19 changes: 0 additions & 19 deletions backupcode/cpubackend/bf16/BF16Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,6 @@ static void _MNNLowpToFp32(const int16_t* src, float* dst, size_t size) {
::memcpy(dst, dstTemp, sizeRemain * sizeof(float));
}
}
static void MNNConvRunForUnitDepthWiseBF16(float* dst, const float* src, const float* weight, size_t fw, size_t fh,
size_t weight_y_step, size_t dilateX_step, size_t dilateY_step) {
int fx, fy;
BFVec4 dstValue(0.0f);
const int16_t* src_z = (const int16_t*)src;
const int16_t* weight_z = (const int16_t*)weight;
for (fy = 0; fy < fh; ++fy) {
const auto src_y = src_z + fy * dilateY_step;
const auto weight_y = weight_z + fy * weight_y_step;
for (fx = 0; fx < fw; ++fx) {
const auto weight_x = weight_y + 4 * fx;
const auto src_x = src_y + fx * dilateX_step;
dstValue = dstValue + BFVec4::load(src_x) * BFVec4::load(weight_x);
}
}
BFVec4::save((int16_t*)dst, dstValue);
}

static void MNNConvRunForLineDepthwiseBF16(float* dstO, const float* srcO, const float* weightO, size_t width, size_t src_w_setup,
size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height,
Expand Down Expand Up @@ -823,7 +806,6 @@ static CoreFunctions* gInstance = nullptr;
bool BF16Functions::init() {
gInstance = new CoreFunctions;
gInstance->MNNConvRunForLineDepthwise = MNNConvRunForLineDepthwiseBF16;
gInstance->MNNConvRunForUnitDepthWise = MNNConvRunForUnitDepthWiseBF16;
gInstance->MNNAxByClampBroadcastUnit = MNNAxByClampBroadcastUnitBF16;
gInstance->MNNFp32ToLowp = _MNNFp32ToLowp;
gInstance->MNNLowpToFp32 = _MNNLowpToFp32;
Expand Down Expand Up @@ -890,7 +872,6 @@ bool BF16Functions::init() {
gInstance->MNNPackedMatMul = NEON_MNNPackedMatMul_BF16;
gInstance->MNNPackedMatMulRemain = NEON_MNNPackedMatMulRemain_BF16;
gInstance->MNNConvRunForLineDepthwise = NEON_MNNConvRunForLineDepthwise_BF16;
gInstance->MNNConvRunForUnitDepthWise = NEON_MNNConvRunForUnitDepthWise_BF16;
gInstance->MNNAxByClampBroadcastUnit = NEON_MNNAxByClampBroadcastC4_BF16;
#ifdef __aarch64__
cpuinfo_arm_isa gCPUInfo;
Expand Down
2 changes: 1 addition & 1 deletion docs/compile/cmake.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
| MNN_OPENCL | 是否构建`OpenCL`后端,默认为`OFF` |
| MNN_OPENGL | 是否构建`OpenGL`后端,默认为`OFF` |
| MNN_VULKAN | 是否构建`Vulkan`后端,默认为`OFF` |
| MNN_ARM82 | 是否构建`Armv8.2`后端,默认为`OFF` |
| MNN_ARM82 | 编译ARM架构时,是否构建`Armv8.2`后端,以支持FP16计算,默认为`ON` |
| MNN_ONEDNN | 是否使用`oneDNN`,默认为`OFF` |
| MNN_AVX512 | 是否构建`avx512`后端,默认为`OFF` |
| MNN_CUDA | 是否构建`Cuda`后端,默认为`OFF` |
Expand Down
56 changes: 32 additions & 24 deletions docs/compile/engine.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,45 @@
```bash
mkdir build && cd build && cmake .. && make -j8
```
## Windows
## Windows(非ARM架构)
- 环境要求
- Microsoft Visual Studio >= 2017
- cmake >= 3.13
- powershell
- Ninja
- 相关编译选项
- 同`Linux/MacOS`
- 具体步骤
1. opencl/vulkan
- *(可选)*下载GPU Caps Viewer,你可以通过这个工具来查看本机设备的详细信息(opencl、opengl、vulkan等)
- sdk和驱动准备
- [opencl sdk](https://github.com/GPUOpen-LibrariesAndSDKs/OCL-SDK/releases),将opencl sdk目录的路径加到AMDAPPSDKROOT环境变量
- [vulkan sdk](https://vulkan.lunarg.com/),将vulkan skd路径加入VULKAN_SDK环境变量,以备cmake查找
- [AMD opencl驱动](https://www.amd.com/zh-hans/support)
- [NVIDIA opencl驱动](https://developer.nvidia.com/opencl)
- [AMD vulkan驱动](https://community.amd.com/community/gaming/blog/2016/02/16/radeon-gpus-are-ready-for-the-vulkan-graphics-api)
2. 编译
- 64位编译:在设置中找到vcvars64.bat(适用于 VS 2017 的 x64 本机工具命令提示)并单击,打开VS编译x64架构程序的虚拟环境
- 32位编译:在设置中找到vcvarsamd64_x86.bat(VS 2017的 x64_x86 交叉工具命令提示符)并单击,打开VS交叉编译x86架构程序的虚拟环境
- 在虚拟环境中执行如下编译命令:
```bash
cd /path/to/MNN
./schema/generate.ps1 # 非必须
mkdir build && cd build
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_WIN_RUNTIME_MT=OFF
ninja
```
- 若需要编译模型转换工具,cmake 命令加上 -DMNN_BUILD_CONVERTER=ON -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_WIN_RUNTIME_MT=ON
- 若需要编译 MNN CUDA,MNN_WIN_RUNTIME_MT 和 MNN_BUILD_SHARED_LIBS 需要设成 ON ,另外加上 -DMNN_CUDA=ON: cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_SHARED_LIBS=ON -DMNN_WIN_RUNTIME_MT=ON -DMNN_CUDA=ON
- Windows 上建议使用 Interpreter::destroy , Tensor::destroy , Module::destroy 等方法进行 MNN 相关内存对象的析构,不要直接使用 delete (直接使用 delete 在 -DMNN_WIN_RUNTIME_MT=ON 时会出问题)
- 64位编译:在设置中找到vcvars64.bat(适用于 VS 2017 的 x64 本机工具命令提示)并单击,打开VS编译x64架构程序的虚拟环境
- 32位编译:在设置中找到vcvarsamd64_x86.bat(VS 2017的 x64_x86 交叉工具命令提示符)并单击,打开VS交叉编译x86架构程序的虚拟环境
- 在虚拟环境中执行如下编译命令:
```bash
cd /path/to/MNN
./schema/generate.ps1 # 非必须
mkdir build && cd build
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_WIN_RUNTIME_MT=OFF
ninja
```
- 若需要编译模型转换工具,cmake 命令加上 -DMNN_BUILD_CONVERTER=ON -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_WIN_RUNTIME_MT=ON
- 若需要编译 MNN CUDA,MNN_WIN_RUNTIME_MT 和 MNN_BUILD_SHARED_LIBS 需要设成 ON ,另外加上 -DMNN_CUDA=ON: cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DMNN_BUILD_SHARED_LIBS=ON -DMNN_WIN_RUNTIME_MT=ON -DMNN_CUDA=ON
- Windows 上建议使用 Interpreter::destroy , Tensor::destroy , Module::destroy 等方法进行 MNN 相关内存对象的析构,不要直接使用 delete (直接使用 delete 在 -DMNN_WIN_RUNTIME_MT=ON 时会出问题)

## Windows(ARM架构)
- 环境要求
- Microsoft Visual Studio >= 2017
- cmake >= 3.13
- Ninja
- Clang
- Clang 安装参考: https://learn.microsoft.com/en-us/cpp/build/clang-support-msbuild?view=msvc-170#install-1
- 相关编译选项
- 同`Linux/MacOS`
- 具体步骤
- 打开vs的ARM64命令行工具
- 进入 MNN 根目录
- mkdir build && cd build
- cmake .. -G Ninja -DCMAKE_C_COMPILER="C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\Llvm\ARM64\bin\clang.exe" -DCMAKE_CXX_COMPILER="C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\Llvm\ARM64\bin\clang++.exe"  -DCMAKE_LINKER="C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\Llvm\ARM64\bin\lld.exe" -DCMAKE_BUILD_TYPE=Release
- Visual Studio 安装路径不一致的,可自行修改脚本
- ninja -j16

## Android
- 环境要求
- cmake >= 3.10
Expand Down
39 changes: 37 additions & 2 deletions docs/tools/quant.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,43 @@ MNN现已推出基于TensorFlow/Pytorch的模型压缩工具mnncompress,请查
| ADMM | 使用ADMM方法进行权值量化 |

## 多输入模型的参数设置的特别说明(MNN现阶段仅支持输入数据类型是非图片的多输入模型)
| input_type | `str` | 输入数据的类型,"sequence" |
| path | `str` | 存放校正特征量化系数的输入数据目录 |,例如该目录下包含2个输入数据集input_0和input_1,子目录input_0和input_1中包含模型的输入数据和一个input.json文件。input_0和input_1分别是两个输入输出信息文件夹,可使用 testMNNFromOnnx.py 等脚本生成,参考模型转换的正确性校验部分。
| 需要特别指定的参数 | 设置值 |
|--------------------|------|
| input_type | `str`:输入数据的类型,"sequence" |
| path | `str`:存放校正特征量化系数的输入数据目录 |,
例如在quant.json文件中 "path": "/home/data/inputs_dir/",你所构造的矫正数据集有两个,分别存放在input_0和input_1子目录下,即"/home/data/inputs_dir/input_0"和"/home/data/inputs_dir/input_1".由GetMNNInfo工具可以得到模型的输入输出名称,例如该模型的输入有三个:data0, data1, data2,输出有两个:out1, out2. 那么在input_0和input_1子目录下分别有六个文件:data0.txt, data1.txt, data2.txt, out1.txt, out2.txt, input.json. 其中的五个文件名要和模型的输入输出名对应,最后一个input.json文件则描述的是输入名和对应的shape内容:
```json
{
"inputs": [
{
"name": "data0",
"shape": [
2,
4,
64,
64
]
},
{
"name": "data1",
"shape": [
1
]
},
{
"name": "data2",
"shape": [
2,
512,
768
]
}
],
"outputs": [
"out1", "out2"
]
}
```

## 量化模型的使用
和浮点模型同样使用方法,输入输出仍然为浮点类型
Expand Down
Loading

0 comments on commit a74551b

Please sign in to comment.