packages/vlm/minigpt4/patch.minigpt4.diff

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 087fc8e..408bb32 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(minigpt4.cpp C CXX)
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
 
 include(FetchContent)
 
@@ -33,6 +34,7 @@ option(MINIGPT4_FMA                    "minigpt4: enable FMA"
 option(MINIGPT4_ACCELERATE             "minigpt4: enable Accelerate framework"                    ON)
 option(MINIGPT4_OPENBLAS               "minigpt4: use OpenBLAS"                                   OFF)
 option(MINIGPT4_CUBLAS                 "minigpt4: use cuBLAS"                                     OFF)
+option(MINIGPT4_CUDA_FP16              "minigpt4: use CUDA FP16 for some operations"              OFF)
 
 # Build only shared library without building tests and extras
 option(MINIGPT4_STANDALONE             "minigpt4: build only MINIGPT4 library"                    OFF)
@@ -44,7 +46,7 @@ option(MINIGPT4_STANDALONE             "minigpt4: build only MINIGPT4 library"
 set(CMAKE_C_FLAGS_DEBUG "-g -DDEBUG") 
 set(CMAKE_CXX_FLAGS_DEBUG "-g -DDEBUG")
 
-set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED true)
@@ -314,8 +316,11 @@ set(LLAMA_AVX512_VBMI ${MINIGPT4_AVX512_VBMI})
 set(LLAMA_AVX512_VNNI ${MINIGPT4_AVX512_VNNI})
 set(LLAMA_FMA ${MINIGPT4_FMA})
 set(LLAMA_ACCELERATE ${MINIGPT4_ACCELERATE})
+set(LLAMA_CUBLAS ${MINIGPT4_CUBLAS})
+set(LLAMA_CUDA_FP16 ${MINIGPT4_CUDA_FP16})
 set(GGML_USE_K_QUANTS ON)
-add_dependency(llama_cpp https://github.com/ggerganov/llama.cpp master-31cfbb1 TRUE)
+add_dependency(llama_cpp https://github.com/dusty-nv/llama.cpp ggml TRUE)
+# add_dependency(llama_cpp https://github.com/ggerganov/llama.cpp master-31cfbb1 TRUE)
 
 set(OPENCV_INCLUDE_DIRS "")
 set(OPENCV_LIBS "")
diff --git a/minigpt4.cpp b/minigpt4.cpp
index f8287f1..62efeac 100644
--- a/minigpt4.cpp
+++ b/minigpt4.cpp
@@ -1773,12 +1773,13 @@ public:
 
         {
             LoggingTimer timer("LLM model init");
-            llama_init_backend(numa);
+            llama_backend_init(numa);
             llm_params = llama_context_default_params();
             llm_params.n_ctx = n_ctx;
             llm_params.n_batch = n_batch;
             llm_params.seed = seed;
             llm_params.use_mmap = true;
+            llm_params.n_gpu_layers = 999;
             // llm_params.use_mlock = true;
             llm_model = LLMModel(llama_load_model_from_file(llm_path.string().c_str(), llm_params));
             llm_ctx = LLMContext(llama_new_context_with_model(&*llm_model, llm_params));
@@ -2117,7 +2118,7 @@ public:
 
         auto ctx0 = ctx;
         struct ggml_cgraph gf = {};
-        gf.n_threads = n_threads;
+        //gf.n_threads = n_threads;
 
         ggml_tensor *cur;
 
@@ -2347,7 +2348,7 @@ public:
         use_scratch(-1);
 
         ggml_build_forward_expand(&gf, cur);
-        ggml_graph_compute(ctx0, &gf);
+        ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
 
         INFO("Compute buffer uses {} MB", bytes_to_mb(get_memory_usage(-1)));
         INFO("Scratch buffer uses {} MB", bytes_to_mb(get_memory_usage(0)));
diff --git a/minigpt4/requirements.txt b/minigpt4/requirements.txt
index 7d536c1..0f73551 100644
--- a/minigpt4/requirements.txt
+++ b/minigpt4/requirements.txt
@@ -1,4 +1,4 @@
-Pillow==10.0.0
-torch==2.0.1
-torchvision==0.15.2
+Pillow
+torch
+torchvision
 gradio
\ No newline at end of file
diff --git a/minigpt4/webui.py b/minigpt4/webui.py
index fbd2017..b3e5ff7 100644
--- a/minigpt4/webui.py
+++ b/minigpt4/webui.py
@@ -11,7 +11,7 @@ import gradio as gr
 import minigpt4_library
 
 title = """<h1 align="center">MiniGPT-4.cpp Demo</h1>"""
-description = """<h3>This is the demo of MiniGPT-4 with ggml (cpu only!). Upload your images and start chatting!</h3>"""
+description = """<h3>This is the demo of MiniGPT-4 with ggml. Upload your images and start chatting!</h3>"""
 article = """<div style='display:flex; gap: 0.25rem; '><a href='https://github.com/Vision-CAIR/MiniGPT-4'><img src='https://img.shields.io/badge/Github-Code-blue'></a></div>
 """
 image_ready = False
@@ -104,7 +104,7 @@ def start(share: bool):
         )
         # stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
 
-    demo.launch(enable_queue=True, share=share)
+    demo.launch(enable_queue=True, share=share, server_name=os.environ.get('GRADIO_SERVER_NAME', '0.0.0.0'), server_port=int(os.environ.get('GRADIO_SERVER_PORT', 7860)))
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Test loading minigpt4')