forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
patch.minigpt4.diff
114 lines (99 loc) · 4.65 KB
/
patch.minigpt4.diff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 087fc8e..408bb32 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(minigpt4.cpp C CXX)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
include(FetchContent)
@@ -33,6 +34,7 @@ option(MINIGPT4_FMA "minigpt4: enable FMA"
option(MINIGPT4_ACCELERATE "minigpt4: enable Accelerate framework" ON)
option(MINIGPT4_OPENBLAS "minigpt4: use OpenBLAS" OFF)
option(MINIGPT4_CUBLAS "minigpt4: use cuBLAS" OFF)
+option(MINIGPT4_CUDA_FP16 "minigpt4: use CUDA FP16 for some operations" OFF)
# Build only shared library without building tests and extras
option(MINIGPT4_STANDALONE "minigpt4: build only MINIGPT4 library" OFF)
@@ -44,7 +46,7 @@ option(MINIGPT4_STANDALONE "minigpt4: build only MINIGPT4 library"
set(CMAKE_C_FLAGS_DEBUG "-g -DDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "-g -DDEBUG")
-set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
@@ -314,8 +316,11 @@ set(LLAMA_AVX512_VBMI ${MINIGPT4_AVX512_VBMI})
set(LLAMA_AVX512_VNNI ${MINIGPT4_AVX512_VNNI})
set(LLAMA_FMA ${MINIGPT4_FMA})
set(LLAMA_ACCELERATE ${MINIGPT4_ACCELERATE})
+set(LLAMA_CUBLAS ${MINIGPT4_CUBLAS})
+set(LLAMA_CUDA_FP16 ${MINIGPT4_CUDA_FP16})
set(GGML_USE_K_QUANTS ON)
-add_dependency(llama_cpp https://github.com/ggerganov/llama.cpp master-31cfbb1 TRUE)
+add_dependency(llama_cpp https://github.com/dusty-nv/llama.cpp ggml TRUE)
+# add_dependency(llama_cpp https://github.com/ggerganov/llama.cpp master-31cfbb1 TRUE)
set(OPENCV_INCLUDE_DIRS "")
set(OPENCV_LIBS "")
diff --git a/minigpt4.cpp b/minigpt4.cpp
index f8287f1..62efeac 100644
--- a/minigpt4.cpp
+++ b/minigpt4.cpp
@@ -1773,12 +1773,13 @@ public:
{
LoggingTimer timer("LLM model init");
- llama_init_backend(numa);
+ llama_backend_init(numa);
llm_params = llama_context_default_params();
llm_params.n_ctx = n_ctx;
llm_params.n_batch = n_batch;
llm_params.seed = seed;
llm_params.use_mmap = true;
+ llm_params.n_gpu_layers = 999;
// llm_params.use_mlock = true;
llm_model = LLMModel(llama_load_model_from_file(llm_path.string().c_str(), llm_params));
llm_ctx = LLMContext(llama_new_context_with_model(&*llm_model, llm_params));
@@ -2117,7 +2118,7 @@ public:
auto ctx0 = ctx;
struct ggml_cgraph gf = {};
- gf.n_threads = n_threads;
+ //gf.n_threads = n_threads;
ggml_tensor *cur;
@@ -2347,7 +2348,7 @@ public:
use_scratch(-1);
ggml_build_forward_expand(&gf, cur);
- ggml_graph_compute(ctx0, &gf);
+ ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
INFO("Compute buffer uses {} MB", bytes_to_mb(get_memory_usage(-1)));
INFO("Scratch buffer uses {} MB", bytes_to_mb(get_memory_usage(0)));
diff --git a/minigpt4/requirements.txt b/minigpt4/requirements.txt
index 7d536c1..0f73551 100644
--- a/minigpt4/requirements.txt
+++ b/minigpt4/requirements.txt
@@ -1,4 +1,4 @@
-Pillow==10.0.0
-torch==2.0.1
-torchvision==0.15.2
+Pillow
+torch
+torchvision
gradio
\ No newline at end of file
diff --git a/minigpt4/webui.py b/minigpt4/webui.py
index fbd2017..b3e5ff7 100644
--- a/minigpt4/webui.py
+++ b/minigpt4/webui.py
@@ -11,7 +11,7 @@ import gradio as gr
import minigpt4_library
title = """<h1 align="center">MiniGPT-4.cpp Demo</h1>"""
-description = """<h3>This is the demo of MiniGPT-4 with ggml (cpu only!). Upload your images and start chatting!</h3>"""
+description = """<h3>This is the demo of MiniGPT-4 with ggml. Upload your images and start chatting!</h3>"""
article = """<div style='display:flex; gap: 0.25rem; '><a href='https://github.com/Vision-CAIR/MiniGPT-4'><img src='https://img.shields.io/badge/Github-Code-blue'></a></div>
"""
image_ready = False
@@ -104,7 +104,7 @@ def start(share: bool):
)
# stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
- demo.launch(enable_queue=True, share=share)
+ demo.launch(enable_queue=True, share=share, server_name=os.environ.get('GRADIO_SERVER_NAME', '0.0.0.0'), server_port=int(os.environ.get('GRADIO_SERVER_PORT', 7860)))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Test loading minigpt4')