diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml
index c0de06bc..f86ec878 100644
--- a/.github/workflows/build-wheels-cpu.yaml
+++ b/.github/workflows/build-wheels-cpu.yaml
@@ -6,47 +6,47 @@ permissions:
   contents: write
 
 jobs:
-  build_wheels_linux:
-    name: Build wheels on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-20.04]
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: "recursive"
-
-      # Used to host cibuildwheel
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.8"
-          cache: "pip"
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          # python -m pip install -e .
-          python -m pip install build wheel
-
-      - name: Build wheels
-        uses: pypa/cibuildwheel@v2.20.0
-        env:
-          # disable repair
-          CIBW_REPAIR_WHEEL_COMMAND: ""
-          CIBW_BUILD_FRONTEND: "build"
-          CIBW_SKIP: "*musllinux*"
-          CIBW_BUILD: "cp310-* cp311-* cp312-*"
-          CMAKE_BUILD_PARALLEL_LEVEL: 4
-        with:
-          package-dir: .
-          output-dir: wheelhouse
-
-      - uses: actions/upload-artifact@v4
-        with:
-          name: wheels-${{ matrix.os }}
-          path: ./wheelhouse/*.whl
+  # build_wheels_linux:
+  #   name: Build wheels on ${{ matrix.os }}
+  #   runs-on: ${{ matrix.os }}
+  #   strategy:
+  #     matrix:
+  #       os: [ubuntu-20.04]
+
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #       with:
+  #         submodules: "recursive"
+
+  #     # Used to host cibuildwheel
+  #     - uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.8"
+  #         cache: "pip"
+
+  #     - name: Install dependencies
+  #       run: |
+  #         python -m pip install --upgrade pip
+  #         # python -m pip install -e .
+  #         python -m pip install build wheel
+
+  #     - name: Build wheels
+  #       uses: pypa/cibuildwheel@v2.20.0
+  #       env:
+  #         # disable repair
+  #         CIBW_REPAIR_WHEEL_COMMAND: ""
+  #         CIBW_BUILD_FRONTEND: "build"
+  #         CIBW_SKIP: "*musllinux*"
+  #         CIBW_BUILD: "cp310-* cp311-* cp312-*"
+  #         CMAKE_BUILD_PARALLEL_LEVEL: 4
+  #       with:
+  #         package-dir: .
+  #         output-dir: wheelhouse
+
+  #     - uses: actions/upload-artifact@v4
+  #       with:
+  #         name: wheels-${{ matrix.os }}
+  #         path: ./wheelhouse/*.whl
 
   build_wheels_win:
     name: Build wheels on ${{ matrix.os }}
@@ -146,7 +146,7 @@ jobs:
 
   release:
     name: Release
-    needs: [build_wheels_linux, build_wheels_win, build_wheels_arm64, build_sdist]
+    needs: [build_wheels_win, build_wheels_arm64, build_sdist]  # build_wheels_linux
     runs-on: ubuntu-latest
 
     steps:
diff --git a/.gitignore b/.gitignore
index 9ff7b7f7..047b7b5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,6 @@
 # Files
 *.safetensors
 *.gguf
-*.png
-*.jpeg
-*.jpg
 *.bin
 *.mp3
 *.wav
diff --git a/README.md b/README.md
index de65264a..40acf915 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,27 @@
-# Nexa SDK
+<div align="center">
 
-The Nexa SDK is a comprehensive toolkit for supporting **ONNX** and **GGML** models. It supports text generation, image generation, vision-language models (VLM), and text-to-speech (TTS) capabilities. Additionally, it offers an OpenAI-compatible API server with JSON schema mode for function calling and streaming support, and a user-friendly Streamlit UI.
+<h1>Nexa SDK</h1>
+
+  <img src="./assets/banner.png" alt="icon"/>
+
+[![MacOS][MacOS-image]][release-url] [![Linux][Linux-image]][release-url] [![Windows][Windows-image]][release-url]
+
+[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk)
+
+<!-- ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/nexaai) ![PyPI - Downloads](https://img.shields.io/pypi/dm/nexaai?color=orange) -->
+
+[![Discord](https://dcbadge.limes.pink/api/server/thRu2HaK4D?style=flat&compact=true)](https://discord.gg/thRu2HaK4D)
+
+[On-device Model Hub](https://model-hub.nexa4ai.com/) / [Nexa SDK Documentation](https://docs.nexaai.com/)
+
+[release-url]: https://github.com/NexaAI/nexa-sdk/releases
+[Windows-image]: https://img.shields.io/badge/windows-0078D4?logo=windows
+[MacOS-image]: https://img.shields.io/badge/-MacOS-black?logo=apple
+[Linux-image]: https://img.shields.io/badge/-Linux-333?logo=ubuntu
+
+</div>
+
+Nexa SDK is a comprehensive toolkit for supporting **ONNX** and **GGML** models. It supports text generation, image generation, vision-language models (VLM), and text-to-speech (TTS) capabilities. Additionally, it offers an OpenAI-compatible API server with JSON schema mode for function calling and streaming support, and a user-friendly Streamlit UI.
 
 ## Features
 
@@ -14,7 +35,7 @@ The Nexa SDK is a comprehensive toolkit for supporting **ONNX** and **GGML** mod
     - **Vision-Language Models (VLM)**
     - **Text-to-Speech (TTS)**
 
-Detailed API documentation is available [here](docs/index.html).
+Detailed API documentation is available [here](https://docs.nexaai.com/).
 
 - **Server:**
   - OpenAI-compatible API
@@ -24,107 +45,83 @@ Detailed API documentation is available [here](docs/index.html).
 
 ## Installation
 
-### GPU version(optional)
-
-check if you have GPU acceleration (torch required)
+### Pre-built Wheels (Recommended)
 
-<details>
-  <summary>CUDA:</summary>
+We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).
 
-```
-import torch
-torch.cuda.is_available()
-```
-
-if True
-
-```
-CMAKE_ARGS="-DGGML_CUDA=on -DSD_CUBLAS=ON" pip install nexaai
-```
-
-Or you prefer to install our pre-built wheel:
-
-```bash
-pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
-```
-
-Optionally, you can install onnx supported version:
+#### CPU
 
 ```bash
-pip install nexaai[onnx] --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
+pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
 ```
 
-</details>
-<details>
-  <summary>Apple M Chip:</summary>
-  Apple icon -> about this mac -> Graphics
-
-if True:
+#### GPU (Metal)
 
-```
-CMAKE_ARGS="-DGGML_METAL=on -DSD_METAL=ON" pip install nexaai
-```
-
-Or you prefer to install our pre-built wheel:
+For the GPU version supporting **Metal (macOS)**:
 
 ```bash
 pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple
 ```
 
-Optionally, you can install onnx supported version:
-
-```bash
-pip install nexaai[onnx] --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple
-```
-
-</details>
-
-### CPU version
-
-<details>
-  <summary>Mac with Intel Chips</summary>
+#### GPU (CUDA)
 
-To install the `nexaai` package on a Mac with Intel chips, use the following command:
+For the GPU version supporting **CUDA (Linux/Windows)**:
 
 ```bash
-CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
+pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
 ```
 
-**Optional:** To install the version with ONNX support, use:
-
-```bash
-CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai[onnx]
-```
+> [!NOTE]
+> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X
 
-</details>
+### Install from source code distribution
 
-<details>
-  <summary>Mac with M Chips or Other Operating Systems</summary>
+If pre-built wheels cannot meet your requirements, you can install Nexa SDK from the source code via cmake.
 
-To install the `nexaai` package on a Mac with M chips or other operating systems, use the following command:
+#### CPU
 
 ```bash
 pip install nexaai
 ```
 
-**Optional:** To install the version with ONNX support, use:
+> [!IMPORTANT]
+> If you are using a Mac with Intel chips, run the following command:
+>
+> ```bash
+> CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
+> ```
 
-```bash
-pip install nexaai[onnx]
-```
+#### GPU (Metal)
 
-</details>
-If you prefer to install the pre-built wheel for CPU versions:
+For the GPU version supporting Metal (macOS):
 
 ```bash
-pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
+CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai
 ```
 
-To include ONNX support:
+#### GPU (CUDA)
+
+For the GPU version supporting CUDA (Linux/Windows), run the following command:
 
 ```bash
-pip install nexaai[onnx] --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
-```
+CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai
+```
+
+> [!TIP]
+> You can accelerate the building process via parallel cmake by appending the following to the commands above:
+>
+> ```bash
+> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
+> ```
+>
+> For example:
+>
+> ```bash
+> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL
+> ```
+
+> [!TIP]
+> For Windows users, we recommend running the installation command in Git Bash to avoid unexpected behavior.
 
 ### Docker Usage
 
diff --git a/assets/banner.png b/assets/banner.png
new file mode 100644
index 00000000..fcaa3859
Binary files /dev/null and b/assets/banner.png differ
diff --git a/nexa/__init__.py b/nexa/__init__.py
index c1336bdb..d85b28ed 100644
--- a/nexa/__init__.py
+++ b/nexa/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.5"
\ No newline at end of file
+__version__ = "0.0.6"
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 16aea040..adee2ae4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "tqdm",                     # Shared dependencies
     "tabulate",
     "streamlit",
+    "python-multipart",
 ]
 classifiers = [
     "Programming Language :: Python :: 3",
diff --git a/requirements.txt b/requirements.txt
index 5dfbadee..41a9b86e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,7 @@ fastapi
 uvicorn
 pydantic
 pillow
+python-multipart
 
 # For onnx
 optimum[onnxruntime]  # for CPU version