not runnable yet

alibaba · Nov 20, 2024 · 5079f7e · 5079f7e
2 parents a6c6298 + 707b8a4
commit 5079f7e
Show file tree

Hide file tree

Showing 229 changed files with 11,410 additions and 6,100 deletions.
diff --git a/.github/workflows/mnn_release.yml b/.github/workflows/mnn_release.yml
@@ -3,6 +3,8 @@ on:
   push:
     tags:
       - '*'
+  workflow_dispatch:
+
 jobs:
   setup:
     permissions:
@@ -13,7 +15,14 @@ jobs:
     steps:
     - name: get-version
       id: get_version
-      run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_OUTPUT
+      run: |
+        if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
+          # 提取标签版本号
+          echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
+        else
+          # 如果不是标签，则设置版本为 'dev'
+          echo "VERSION=dev" >> $GITHUB_OUTPUT
+        fi
 
   linux-release:
     needs: [setup]
@@ -34,8 +43,9 @@ jobs:
         rm -f ${{ env.PACKAGENAME }}.zip
         zip -9 -y -r ${{ env.PACKAGENAME }}.zip ${{ env.PACKAGENAME }}
     - name: upload-zip
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
+        name: artifact-${{ env.PACKAGENAME }}
         path: ${{ env.PACKAGENAME }}.zip
 
   windows-release:
@@ -44,7 +54,7 @@ jobs:
     env:
       PACKAGENAME: mnn_${{ needs.setup.outputs.VERSION }}_windows_x64_cpu_opencl
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
       with:
         submodules: true
 
@@ -56,8 +66,9 @@ jobs:
     - name: package
       run: 7z a -r ${{ env.PACKAGENAME }}.zip ${{ env.PACKAGENAME }}
     - name: upload-zip
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
+        name: artifact-${{ env.PACKAGENAME }}
         path: ${{ env.PACKAGENAME }}.zip
 
   macos-release:
@@ -79,8 +90,9 @@ jobs:
         rm -f ${{ env.PACKAGENAME }}.zip
         zip -9 -y -r ${{ env.PACKAGENAME }}.zip ${{ env.PACKAGENAME }}
     - name: upload-zip
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
+        name: artifact-${{ env.PACKAGENAME }}
         path: ${{ env.PACKAGENAME }}.zip
 
   android-release:
@@ -99,8 +111,9 @@ jobs:
         rm -f ${{ env.PACKAGENAME }}.zip
         zip -9 -y -r ${{ env.PACKAGENAME }}.zip ${{ env.PACKAGENAME }}
     - name: upload-zip
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
+        name: artifact-${{ env.PACKAGENAME }}
         path: ${{ env.PACKAGENAME }}.zip
 
   ios-release:
@@ -123,19 +136,21 @@ jobs:
         rm -f ${{ env.PACKAGENAME }}.zip
         zip -9 -y -r ${{ env.PACKAGENAME }}.zip ios_build/Release-iphoneos/MNN.framework
     - name: upload-zip
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
+        name: artifact-${{ env.PACKAGENAME }}
         path: ${{ env.PACKAGENAME }}.zip
 
   upload-release:
     name: upload_to_release
     needs: [linux-release, windows-release, macos-release, android-release, ios-release]
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/download-artifact@v4.1.7
+    - uses: actions/download-artifact@v4
       with:
-        name: artifact
+        pattern: artifact-*
         path: assert
+        merge-multiple: true
 
     - name: show file
       run: ls assert
@@ -146,4 +161,4 @@ jobs:
       with:
         file: assert/*.zip
         tags: true
-        draft: true
+        draft: true
diff --git a/.github/workflows/pymnn_release.yml b/.github/workflows/pymnn_release.yml
@@ -16,6 +16,7 @@ jobs:
       matrix:
         include:
           - { os: ubuntu-latest,    arch: x86_64,     build: 'cp*-manylinux*' }
+          - { os: ubuntu-latest,    arch: aarch64,    build: 'cp*-manylinux*' }
           - { os: windows-latest,   arch: AMD64,      build: 'cp*'          }
           - { os: macos-13,         arch: x86_64,     build: 'cp*'          }
           - { os: macos-14,         arch: arm64,      build: 'cp*'          }
@@ -33,6 +34,12 @@ jobs:
       with:
         python-version: '3.12'
 
+    - name: set up qemu
+      if: matrix.os == 'ubuntu-latest' && matrix.arch == 'aarch64'
+      uses: docker/setup-qemu-action@v3
+      with:
+        platforms: all
+
     - name: install pipx
       if: matrix.os == 'macos-14'
       run: python -m pip install pipx
@@ -60,8 +67,9 @@ jobs:
       shell: bash
 
     - name: Upload wheels
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
+        name: artifact-${{ matrix.os }}-${{ matrix.arch }}
         path: wheelhouse/*.whl
 
   publish_wheels:
@@ -76,12 +84,13 @@ jobs:
       with:
         python-version: '3.x'
 
-    - uses: actions/download-artifact@v4.1.7
+    - uses: actions/download-artifact@v4
       with:
-        name: artifact
+        pattern: artifact-*
         path: dist
+        merge-multiple: true
 
     - uses: pypa/gh-action-pypi-publish@release/v1
       with:
         password: ${{ secrets.PYPI_API_TOKEN }}
-        skip_existing: true
+        skip_existing: true
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,7 @@ out/
 .gradle
 .gradle/
 build/
+buildvisionOs/
 
 # Signing files
 .signing/

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -75,7 +75,7 @@ option(MNN_SUPPORT_BF16 "Enable MNN's bf16 op" OFF)
 option(MNN_LOW_MEMORY "Build MNN support low memory for weight quant model." OFF)
 option(MNN_CPU_WEIGHT_DEQUANT_GEMM "Build MNN CPU weight dequant related gemm kernels." OFF)
 
-IF (OHOS)
+IF (OHOS AND MNN_INTERNAL)
   include($ENV{NODE_PATH}/@ali/tcpkg/tcpkg.cmake)
   export_headers(DIR ${CMAKE_SOURCE_DIR}/include/MNN)
   IF (MNN_BUILD_OPENCV)
@@ -211,6 +211,7 @@ option(MNN_VULKAN "Enable Vulkan" OFF)
 option(MNN_ARM82 "Enable ARMv8.2's FP16 Compute" ON)
 option(MNN_KLEIDIAI "Enable KLEIDIAI" OFF)
 option(MNN_ONEDNN "Enable oneDNN" OFF)
+option(MNN_AVX2 "Open AVX2 Compile for x86 if possible" ON)
 option(MNN_AVX512 "Enable AVX512" OFF)
 option(MNN_CUDA "Enable CUDA" OFF)
 option(MNN_TENSORRT "Enable TensorRT" OFF)
@@ -314,6 +315,9 @@ IF(MNN_DEBUG_MEMORY)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
 endif()
 
+set(MNN_DEPS "")
+set(MNN_EXTRA_DEPENDS "")
+
 IF(CMAKE_BUILD_TYPE MATCHES Debug)
     add_definitions(-DMNN_DEBUG -DDEBUG)
     if(MSVC)
@@ -339,6 +343,13 @@ else()
         endif()
     endif()
 ENDIF(CMAKE_BUILD_TYPE MATCHES Debug)
+if(OHOS)
+    IF(MNN_USE_LOGCAT)
+        add_definitions(-DMNN_USE_LOGCAT)
+        add_definitions(-Wno-format-security)
+        list(APPEND MNN_EXTRA_DEPENDS libhilog_ndk.z.so)
+    ENDIF()
+endif()
 if(CMAKE_SYSTEM_NAME MATCHES "^Android")
     IF(MNN_USE_LOGCAT)
         add_definitions(-DMNN_USE_LOGCAT)
@@ -458,8 +469,6 @@ IF(MNN_BUILD_LLM)
 ENDIF()
 
 
-set(MNN_DEPS "")
-set(MNN_EXTRA_DEPENDS "")
 
 # Add Thread dependency
 find_package(Threads)
@@ -507,13 +516,11 @@ if (NOT MSVC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions ")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
 else()
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:fast")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise")
 endif()
 
 # Metal
-set(MNN_DEPS "")
-set(MNN_EXTRA_DEPENDS "")
 list(APPEND MNN_DEPS MNN)
 
 # Plugin
@@ -533,14 +540,10 @@ endif()
 # CoreML
 IF(MNN_COREML)
     add_definitions(-DMNN_COREML_ENABLED=1)
-    add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/coreml/)
+    include(${CMAKE_CURRENT_LIST_DIR}/source/backend/coreml/CMakeLists.txt)
 
-    IF(MNN_SEP_BUILD)
-      list(APPEND MNN_DEPS MNNCoreML)
-      list(APPEND MNN_EXTRA_DEPENDS MNNCoreML)
-    ELSE()
-      list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCoreML>)
-    ENDIF()
+    list(APPEND MNN_TARGETS MNNCoreML)
+    list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCoreML>)
 
     find_library(COREML CoreML)
     find_library(FOUNDATION Foundation)
@@ -641,7 +644,7 @@ ELSE()
 ENDIF()
 
 # Model Internal. Enable MNN internal features such as model authentication and metrics logging.
-if (MNN_INTERNAL)
+if (MNN_INTERNAL AND NOT OHOS) # TODO: support OHOS logging
     target_compile_options(MNNCore PRIVATE -DMNN_INTERNAL_ENABLED)
     target_compile_options(MNN_Express PRIVATE -DMNN_INTERNAL_ENABLED)
     include(${CMAKE_CURRENT_LIST_DIR}/source/internal/logging/CMakeLists.txt)

diff --git a/README.md b/README.md
@@ -7,6 +7,10 @@
 ## Intro
 MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. At present, MNN has been integrated into more than 30 apps of Alibaba Inc, such as Taobao, Tmall, Youku, DingTalk, Xianyu, etc., covering more than 70 usage scenarios such as live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control. In addition, MNN is also used on embedded devices, such as IoT.
 
+[MNN-LLM](https://github.com/alibaba/MNN/tree/master/transformers/llm) is a large language model runtime solution developed based on the MNN engine. The mission of this project is to deploy LLM models locally on everyone's platforms(Mobile Phone/PC/IOT). It supports popular large language models such as Qianwen, Baichuan, Zhipu, LLAMA, and others. [MNN-LLM User guide](https://mnn-docs.readthedocs.io/en/latest/transformers/llm.html)
+
+[MNN-Diffusion](https://github.com/alibaba/MNN/tree/master/transformers/diffusion) is a stable diffusion model runtime solution developed based on the MNN engine. The mission of this project is to deploy stable diffusion models locally on everyone's platforms. [MNN-Diffusion User guide](https://mnn-docs.readthedocs.io/en/latest/transformers/diffusion.html)
+
 ![architecture](doc/architecture.png)
 
 Inside Alibaba, [MNN](https://mp.weixin.qq.com/s/5I1ISpx8lQqvCS8tGd6EJw) works as the basic module of the compute container in the [Walle](https://mp.weixin.qq.com/s/qpeCETty0BqqNJV9CMJafA) System, the first end-to-end, general-purpose, and large-scale production system for device-cloud collaborative machine learning, which has been published in the top system conference OSDI’22. The key design principles of MNN and the extensive benchmark testing results (vs. TensorFlow, TensorFlow Lite, PyTorch, PyTorch Mobile, TVM) can be found in the OSDI paper. The scripts and instructions for benchmark testing are put in the path “/benchmark”. If MNN or the design of Walle helps your research or production use, please cite our OSDI paper as follows:
@@ -26,7 +30,9 @@ Inside Alibaba, [MNN](https://mp.weixin.qq.com/s/5I1ISpx8lQqvCS8tGd6EJw) works a
 
 
 ## Documentation and Workbench
-MNN's docs are in place in [Yuque docs here](https://www.yuque.com/mnn/en) and [Read the docs](https://mnn-docs.readthedocs.io/en/latest).
+MNN's docs are in place in [Read the docs](https://mnn-docs.readthedocs.io/en/latest).
+
+You can also read docs/README to build docs's html.
 
 MNN Workbench could be downloaded from [MNN's homepage](http://www.mnn.zone), which provides pretrained models, visualized training tools, and one-click deployment of models to devices.
 

diff --git a/README_CN.md b/README_CN.md
@@ -6,6 +6,10 @@
 
 [MNN](https://github.com/alibaba/MNN)是一个轻量级的深度神经网络引擎，支持深度学习的推理与训练。适用于服务器/个人电脑/手机/嵌入式各类设备。目前，MNN已经在阿里巴巴的手机淘宝、手机天猫、优酷等30多个App中使用，覆盖直播、短视频、搜索推荐、商品图像搜索、互动营销、权益发放、安全风控等场景。
 
+[MNN-LLM](https://github.com/alibaba/MNN/tree/master/transformers/llm)是基于MNN引擎开发的大语言模型运行方案，解决大语言模型在本地设备的高效部署问题(手机/个人电脑/嵌入式设备)。支持常见的千问/百川/智谱/LLAMA等大语言模型。使用教程：[MNN-LLM使用教程](https://mnn-docs.readthedocs.io/en/latest/transformers/llm.html)
+
+[MNN-Diffusion](https://github.com/alibaba/MNN/tree/master/transformers/diffusion)是基于MNN引擎开发的Stable Diffusion文生图模型运行方案，解决Stable Diffusion模型在本地设备的高效部署问题。使用教程：[MNN-Diffusion使用教程](https://mnn-docs.readthedocs.io/en/latest/transformers/diffusion.html)
+
 ![架构图](doc/architecture.png)
 
 在阿里巴巴中，[MNN](https://mp.weixin.qq.com/s/5I1ISpx8lQqvCS8tGd6EJw)被用作为[Walle](https://mp.weixin.qq.com/s/qpeCETty0BqqNJV9CMJafA)系统中计算容器的基础模块。Walle是首个端到端、通用型、规模化产业应用的端云协同机器学习系统，发表于操作系统顶会OSDI 2022。Walle的论文中解释了MNN的关键设计理念，并提供了MNN相对于其他深度学习框架（TensorFlow, TensorFlow Lite, PyTorch, PyTorch Mobile, TVM）的benchmark测试结果。相关测试脚本和说明文档被放在“/benchmark”目录下。如果MNN或Walle的设计对你的研究或生产有所助益，欢迎引用我们的OSDI论文：
@@ -26,7 +30,9 @@
 ## 文档与工作台
 MNN文档：
 - [最新文档(readthedocs)](https://mnn-docs.readthedocs.io/en/latest/index.html)
-- [语雀文档](https://www.yuque.com/mnn/cn)
+
+- 也可阅读 docs/README ，编译本地文档
+
 
 [MNN官网](http://www.mnn.zone)上还可以下载MNN团队全新力作MNN工作台，涵盖开箱即用模型、可视化训练等工具，更可以一键部署到多端设备。
 

diff --git a/docs/compile/cmake.md b/docs/compile/cmake.md
@@ -40,7 +40,8 @@ MNN使用CMake构建项目，CMake中的宏定义列表如下：
 | MNN_VULKAN           | 是否构建`Vulkan`后端，默认为`OFF` |
 | MNN_ARM82            | 编译ARM架构时，是否构建`Armv8.2`后端，以支持FP16计算，默认为`ON` |
 | MNN_ONEDNN           | 是否使用`oneDNN`，默认为`OFF` |
-| MNN_AVX512           | 是否构建`avx512`后端，默认为`OFF` |
+| MNN_AVX2             | 在`MNN_USE_SSE`开启的基础上，是否增加AVX2指令的支持，默认为`ON` |
+| MNN_AVX512           | 在`MNN_USE_SSE`和`MNN_AVX2`开启的基础上，是否增加`avx512`指令集的支持，默认为`OFF` |
 | MNN_CUDA             | 是否构建`Cuda`后端，默认为`OFF` |
 | MNN_CUDA_PROFILE     | 是否打开CUDA profile工具，默认为`OFF` |
 | MNN_CUDA_QUANT       | 是否打开CUDA 量化文件编译，默认为`OFF` |
@@ -85,3 +86,4 @@ MNN使用CMake构建项目，CMake中的宏定义列表如下：
 | MNN_SUPPORT_TRANSFORMER_FUSE | 是否支持Fuse Transformer相关OP实现，默认为 `OFF` |
 | MNN_BUILD_LLM        | 是否构建基于MNN的llm库和demo，默认为`OFF` |
 | MNN_BUILD_DIFFUSION  | 是否构建基于MNN的diffusion demo，需要打开MNN_BUILD_OPENCV和MNN_IMGCODECS宏使用 默认为`OFF` |
+| MNN_KLEIDIAI         | 是否集成ARM的klediAI加速库【目前处于实验状态，只能跑对称量化的LLM模型】，默认为`OFF` |