diff --git a/android/gradle.properties b/android/gradle.properties
index 99d0efd3..9e3fc850 100644
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -6,7 +6,7 @@
 # http://www.gradle.org/docs/current/userguide/build_environment.html
 # Specifies the JVM arguments used for the daemon process.
 # The setting is particularly useful for tweaking memory settings.
-org.gradle.jvmargs=-Xms1024m -Xmx4g -XX:MaxDirectMemorySize=3g -Dsun.nio.MaxDirectMemorySize=3g -Dfile.encoding=UTF-8 -Dorg.gradle.parallel=true -Dorg.gradle.workers.max=4
+org.gradle.jvmargs=-Xms2048m -Xmx5g -XX:MaxDirectMemorySize=4g -Dsun.nio.MaxDirectMemorySize=4g -Dfile.encoding=UTF-8 -Dorg.gradle.parallel=true -Dorg.gradle.workers.max=4
 # When configured, Gradle will run in incubating parallel mode.
 # This option should only be used with decoupled projects. More details, visit
 # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
diff --git a/android/llama/src/main/cpp/CMakeLists.txt b/android/llama/src/main/cpp/CMakeLists.txt
index adb370e5..2fab4775 100644
--- a/android/llama/src/main/cpp/CMakeLists.txt
+++ b/android/llama/src/main/cpp/CMakeLists.txt
@@ -1,54 +1,68 @@
-# Sets the minimum CMake version required for this project.
 cmake_minimum_required(VERSION 3.22.1)
-set(CMAKE_BUILD_TYPE  Release)
-# Declares the project name
+set(CMAKE_BUILD_TYPE Release)
+
 project("llama-android")
 
-# Enable FetchContent module
 include(FetchContent)
 
 FetchContent_Declare(
         json
         GIT_REPOSITORY https://github.com/nlohmann/json
-        GIT_TAG        v3.11.3
+        GIT_TAG v3.11.3
 )
-FetchContent_MakeAvailable(json)
 
-# Declare llama.cpp repository
+# set(SOURCE_BASE_DIR /nexa-ai/llama.cpp)
+##### from local #####
+# FetchContent_Declare(
+#         llama
+#         SOURCE_DIR ${SOURCE_BASE_DIR}
+# )
+# FetchContent_Declare(
+#         llava
+#         SOURCE_DIR ${SOURCE_BASE_DIR}/examples/llava
+# )
+# FetchContent_Declare(
+#         omni_vlm
+#         SOURCE_DIR ${SOURCE_BASE_DIR}/examples/omni-vlm
+# )
+# FetchContent_Declare(
+#         omni_audio
+#         SOURCE_DIR ${SOURCE_BASE_DIR}/examples/nexa-omni-audio
+# )
+
+##### from remote #####
+
 FetchContent_Declare(
-        llama
-        GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
-        GIT_TAG release
-        # SOURCE_SUBDIR llama.cpp_74d73dc
+       llama
+       GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
+       GIT_TAG master
 )
-
-# Declare llama.cpp repository
 FetchContent_Declare(
-        llava
-        GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
-        GIT_TAG release
-        SOURCE_SUBDIR examples/llava
+       llava
+       GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
+       GIT_TAG master
+       SOURCE_SUBDIR examples/llava
 )
-
 FetchContent_Declare(
-        omni_vlm
-        GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
-        GIT_TAG release
-        SOURCE_SUBDIR examples/omni-vlm
+       omni_vlm
+       GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
+       GIT_TAG master
+       SOURCE_SUBDIR examples/omni-vlm
+)
+FetchContent_Declare(
+       omni_audio
+       GIT_REPOSITORY https://github.com/NexaAI/llama.cpp.git
+       GIT_TAG T/dev
+       SOURCE_SUBDIR examples/nexa-omni-audio
 )
 
-# Make the content available
-FetchContent_MakeAvailable(llama llava omni_vlm)
+FetchContent_MakeAvailable(json llama llava omni_vlm omni_audio)
 
-# Create the main library
 add_library(${CMAKE_PROJECT_NAME} SHARED
         llama-android.cpp
         common.cpp
         llava-android.cpp
 )
-
-
-# Link the required libraries
 target_link_libraries(${CMAKE_PROJECT_NAME}
         nlohmann_json
         llama
@@ -59,14 +73,11 @@ target_link_libraries(${CMAKE_PROJECT_NAME}
 )
 
 
+##### vision  #####
 add_library(omni-android SHARED
-        llama-android.cpp
         common.cpp
         omni-android.cpp
 )
-
-
-# Link the required libraries
 target_link_libraries(omni-android
         nlohmann_json
         llama
@@ -76,3 +87,18 @@ target_link_libraries(omni-android
         omni_vlm
 )
 
+
+##### audio  #####
+add_library(audio-android SHARED
+        audio-android.cpp
+        common.cpp
+)
+target_link_libraries(audio-android
+        nlohmann_json
+        llama
+        common
+        omni_audio
+        android
+        log
+)
+
diff --git a/android/llama/src/main/cpp/audio-android.cpp b/android/llama/src/main/cpp/audio-android.cpp
new file mode 100644
index 00000000..307766d7
--- /dev/null
+++ b/android/llama/src/main/cpp/audio-android.cpp
@@ -0,0 +1,173 @@
+#include <android/log.h>
+#include <jni.h>
+#include <iomanip>
+#include <math.h>
+#include <string>
+#include <unistd.h>
+#include "omni.cpp"
+#include <nlohmann/json.hpp>
+#include <jni.h>
+#include <string>
+#include <iostream>
+#include <thread>
+
+#define TAG "audio-android.cpp"
+#define LOGi(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
+#define LOGe(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
+
+extern bool is_valid_utf8(const char* str);
+
+extern std::string jstring2str(JNIEnv* env, jstring jstr);
+
+void redirect_output_to_logcat(const char* tag, int fd) {
+    char buffer[1024];
+    while (true) {
+        ssize_t count = read(fd, buffer, sizeof(buffer) - 1);
+        if (count <= 0) break;
+        buffer[count] = '\0';
+        __android_log_print(ANDROID_LOG_DEBUG, tag, "%s", buffer);
+    }
+}
+
+void setup_redirect_stdout_stderr() {
+    int stdout_pipe[2];
+    int stderr_pipe[2];
+
+    pipe(stdout_pipe);
+    pipe(stderr_pipe);
+
+    // 重定向 stdout
+    dup2(stdout_pipe[1], STDOUT_FILENO);
+    close(stdout_pipe[1]);
+    std::thread(redirect_output_to_logcat, "STDOUT", stdout_pipe[0]).detach();
+
+    // 重定向 stderr
+    dup2(stderr_pipe[1], STDERR_FILENO);
+    close(stderr_pipe[1]);
+    std::thread(redirect_output_to_logcat, "STDERR", stderr_pipe[0]).detach();
+}
+
+JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) {
+    setup_redirect_stdout_stderr();
+    return JNI_VERSION_1_6;
+}
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_nexa_NexaAudioInference_init_1ctx_1params(JNIEnv *env, jobject /* this */, jstring jmodel, jstring jprojector, jstring jaudio) {
+    const char* model = env->GetStringUTFChars(jmodel, nullptr);
+    const char* projector = env->GetStringUTFChars(jprojector, nullptr);
+    const char* audio = env->GetStringUTFChars(jaudio, nullptr);
+    const char* argv[] = {"-t", "1"};
+    int argc = 1;
+    omni_context_params* ctx_params = new omni_context_params(omni_context_default_params());
+    omni_context_params_parse(argc, const_cast<char**>(argv), *ctx_params);
+    ctx_params->model = model;
+    ctx_params->mmproj = projector;
+    ctx_params->file = audio;
+
+    return reinterpret_cast<jlong>(ctx_params);
+}
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_nexa_NexaAudioInference_init_1ctx(JNIEnv *env, jobject /* this */, jlong jctx_params) {
+    auto* ctx_params = reinterpret_cast<omni_context_params *>(jctx_params);
+    std::cout << ctx_params->n_gpu_layers << std::endl;
+    std::cout << ctx_params->model << std::endl;
+    std::cout << ctx_params->mmproj << std::endl;
+    std::cout << ctx_params->file << std::endl;
+    omni_context *ctx_omni = omni_init_context(*ctx_params);
+    return reinterpret_cast<jlong>(ctx_omni);
+}
+
+extern "C" JNIEXPORT void JNICALL
+Java_com_nexa_NexaAudioInference_free_1ctx(JNIEnv *env, jobject /* this */, jlong jctx_omni) {
+    auto* ctx_omni = reinterpret_cast<omni_context *>(jctx_omni);
+    omni_free(ctx_omni);
+}
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_nexa_NexaAudioInference_init_1npast(JNIEnv *env, jobject /* this */) {
+    int* n_past = new int(0);
+    return reinterpret_cast<jlong>(n_past);
+}
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_nexa_NexaAudioInference_init_1params(JNIEnv *env, jobject /* this */, jlong jctx_params) {
+    auto* ctx_params = reinterpret_cast<omni_context_params *>(jctx_params);
+
+    if (ctx_params == nullptr) {
+        std::cerr << "Error: jctx_params is null!" << std::endl;
+        return 0;  // Return 0 (null) if the context parameter is invalid.
+    }
+
+    // Step 2: Call the function to extract omni_params from ctx_params.
+    omni_params extracted_params;
+    try {
+        extracted_params = get_omni_params_from_context_params(*ctx_params);
+    } catch (const std::exception& e) {
+        std::cerr << "Error in get_omni_params_from_context_params: " << e.what() << std::endl;
+        return 0;  // Return 0 (null) if an exception is thrown during the extraction.
+    }
+
+    // Step 3: Allocate memory for omni_params and ensure it's successful.
+    omni_params* all_params = nullptr;
+    try {
+        all_params = new omni_params(extracted_params);
+    } catch (const std::bad_alloc& e) {
+        std::cerr << "Error: Failed to allocate memory for omni_params: " << e.what() << std::endl;
+        return 0;  // Return 0 (null) if memory allocation fails.
+    }
+
+    std::cout << " fname_inp size: " << all_params->whisper.fname_inp.size() << std::endl;
+
+    // Step 4: Return the pointer to the newly allocated omni_params object.
+    std::cout << "all_params address: " << all_params << std::endl;
+    return reinterpret_cast<jlong>(all_params);
+}
+
+
+//val sampler = init_sampler(allParamsPointer, ctxParamsPointer, prompt, audiuo_path, npastPointer)
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_nexa_NexaAudioInference_init_1sampler(JNIEnv *env, jobject /* this */, jlong jctx_omni, jlong jctx_params, jstring jprompt, jstring jaudio_path, jlong jnpast) {
+    auto* n_past = reinterpret_cast<int*>(jnpast);
+    if (n_past == nullptr) {
+    std::cout << "n_past is null!" << std::endl;
+    }
+    const char* prompt = env->GetStringUTFChars(jprompt, nullptr);
+    auto* all_params = reinterpret_cast<omni_params *>(jctx_params);
+    auto* ctx_omni = reinterpret_cast<omni_context *>(jctx_omni);
+
+    ggml_tensor *audio_embed = omni_process_audio(ctx_omni, *all_params);
+    std::string system_prompt, user_prompt;
+    system_prompt = "<start_of_turn>user\nAudio 1: <|audio_bos|>";
+    user_prompt = "<|audio_eos|>\n" + std::string(prompt) + "<end_of_turn>\n<start_of_turn>model\n";
+
+    eval_string(ctx_omni->ctx_llama, system_prompt.c_str(), all_params->gpt.n_batch, n_past, true);
+    omni_eval_audio_embed(ctx_omni->ctx_llama, audio_embed, all_params->gpt.n_batch, n_past);
+    eval_string(ctx_omni->ctx_llama, user_prompt.c_str(), all_params->gpt.n_batch, n_past, false);
+
+    struct common_sampler * ctx_sampling = common_sampler_init(ctx_omni->model, all_params->gpt.sparams);
+
+    return reinterpret_cast<jlong>(ctx_sampling);
+}
+
+
+extern "C" JNIEXPORT jstring JNICALL
+Java_com_nexa_NexaAudioInference_sampler(JNIEnv *env, jobject /* this */, jlong jctx_omni, jlong jsampler, jlong jnpast) {
+    auto* ctx_omni = reinterpret_cast<omni_context *>(jctx_omni);
+    auto* sampler = reinterpret_cast<common_sampler *>(jsampler);
+    auto* n_past = reinterpret_cast<int*>(jnpast);
+
+    const char * tmp = sample(sampler, ctx_omni->ctx_llama, n_past);
+
+    jstring new_token = nullptr;
+    new_token = env->NewStringUTF(tmp);
+    return new_token;
+}
+
+
+extern "C" JNIEXPORT jstring JNICALL
+Java_com_nexa_NexaAudioInference_free_1sampler(JNIEnv *env, jobject /* this */, jlong jsampler) {
+    auto* sampler = reinterpret_cast<common_sampler *>(jsampler);
+    common_sampler_free(sampler);
+}
diff --git a/android/llama/src/main/java/com/nexa/NexaAudioInference.kt b/android/llama/src/main/java/com/nexa/NexaAudioInference.kt
new file mode 100644
index 00000000..5d0a68c9
--- /dev/null
+++ b/android/llama/src/main/java/com/nexa/NexaAudioInference.kt
@@ -0,0 +1,114 @@
+package com.nexa
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.flow
+import kotlinx.coroutines.flow.flowOn
+
+class NexaAudioInference(
+    private val modelPath: String,
+    private val projectorPath: String,
+    private var imagePath: String,
+    private var stopWords: List<String> = emptyList(),
+    private var temperature: Float = 0.8f,
+    private var maxNewTokens: Int = 64,
+    private var topK: Int = 40,
+    private var topP: Float = 0.95f
+) {
+    init {
+        System.loadLibrary("audio-android")
+    }
+
+    private var ctxParamsPointer: Long = 0
+    private var ctxPointer: Long = 0
+    private var generatedTokenNum: Int = 0
+    private var generatedText: String = ""
+    private var isModelLoaded: Boolean = false
+
+    private external fun init_ctx_params( model: String, project: String, audio_path:String): Long
+    private external fun init_ctx(ctxParamsPointer: Long): Long
+    private external fun free_ctx(ctxPointer: Long)
+    private external fun init_npast():Long
+    private external fun init_params(ctxParamsPointer: Long): Long
+    private external fun init_sampler(ctxPointer:Long, omniParamsPointer: Long, prompt: String, audioPath: String, npastPointer: Long): Long
+    private external fun sampler(ctxOmniPointer :Long , samplerPointer: Long, npastPointer: Long): String
+    private external fun free_sampler(samplerPointer: Long)
+
+    @Synchronized
+    fun loadModel() {
+        if(isModelLoaded){
+            throw RuntimeException("Model is already loaded.")
+        }
+        try {
+            val audiuo_path  = "/storage/emulated/0/Android/data/ai.nexa.app_java/files/jfk.wav"
+            ctxParamsPointer = init_ctx_params(modelPath, projectorPath, audiuo_path)
+            ctxPointer = init_ctx(ctxParamsPointer)
+            isModelLoaded = true
+        } catch (e: Exception) {
+            println(e)
+        } catch (e: UnsatisfiedLinkError) {
+            throw RuntimeException("Native method not found: ${e.message}")
+        }catch (e:Error){
+            println(e)
+        }
+    }
+
+    fun dispose() {
+        if(ctxParamsPointer!=0L){
+            ctxParamsPointer = 0;
+        }
+        if (ctxPointer != 0L) {
+            free_ctx(ctxPointer)
+            ctxPointer = 0;
+        }
+    }
+
+    private fun shouldStop(): Boolean {
+        if(this.generatedTokenNum >= this.maxNewTokens){
+            return true
+        }
+
+        return stopWords.any { generatedText.contains(it, ignoreCase = true) }
+    }
+
+    private fun resetGeneration() {
+        generatedTokenNum = 0
+        generatedText = ""
+    }
+
+    @Synchronized
+    fun createCompletionStream(
+        prompt: String,
+        imagePath: String? = null,
+        stopWords: List<String>? = null,
+        temperature: Float? = null,
+        maxNewTokens: Int? = null,
+        topK: Int? = null,
+        topP: Float? = null
+    ): Flow<String> = flow {
+        if(!isModelLoaded){
+            throw RuntimeException("Model is not loaded.")
+        }
+        resetGeneration()
+        val imagePathToUse = imagePath ?: this@NexaAudioInference.imagePath
+
+        val audiuo_path  = "/storage/emulated/0/Android/data/ai.nexa.app_java/files/jfk.wav"
+        val npastPointer = init_npast()
+        val allParamsPointer = init_params(ctxParamsPointer)
+        val sampler = init_sampler(ctxPointer, allParamsPointer, prompt, audiuo_path, npastPointer)
+
+        try {
+            while (true) {
+                val sampledText = sampler(ctxPointer, sampler, npastPointer)
+                generatedTokenNum += 1
+                generatedText += sampledText
+                if(shouldStop()){
+                    break
+                }
+                emit(sampledText)
+            }
+        } finally {
+            resetGeneration()
+            free_sampler(sampler)
+        }
+    }.flowOn(Dispatchers.IO)
+}