PABannier · PABannier · Oct 23, 2023 · Oct 23, 2023 · Oct 23, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -39,6 +39,10 @@ target_link_libraries(${ENCODEC_LIB} PUBLIC ggml)
 target_include_directories(${ENCODEC_LIB} PUBLIC .)
 target_compile_features(${ENCODEC_LIB} PUBLIC cxx_std_11)
 
+if (GGML_CUBLAS)
+    add_compile_definitions(GGML_USE_CUBLAS)
+endif()
+
 if (GGML_METAL)
     add_compile_definitions(GGML_USE_METAL)
 endif()
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ https://github.com/PABannier/encodec.cpp/assets/12958149/d11561be-98e9-4504-bba7
 - [x] Mixed F16 / F32 precision
 - [ ] 4-bit and 8-bit quantization
 - [x] Metal support
-- [ ] cuBLAS support
+- [x] cuBLAS support
 
 ## Implementation details
 
@@ -61,3 +61,12 @@ the power consumption and CPU activity is reduced.
 cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off ..
 cmake --build . --config Release
 ```
+
+### Using cuBLAS
+
+The inference can be offloaded on a CUDA backend with cuBLAS.
+
+```bash
+cmake -DGGML_CUBLAS=ON -DBUILD_SHARED_LIBS=Off ..
+cmake --build . --config Release
+```
diff --git a/encodec.cpp b/encodec.cpp
@@ -2,6 +2,10 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
+#ifdef GGML_USE_CUBLAS
+#include "ggml-cuda.h"
+#endif
+
 #ifdef GGML_USE_METAL
 #include "ggml-metal.h"
 #endif
@@ -446,6 +450,16 @@ bool encodec_load_model_weights(const std::string & fname, encodec_model & model
         }
     }
 
+#ifdef GGML_USE_CUBLAS
+    if (n_gpu_layers > 0) {
+        fprintf(stderr, "%s: using CUDA backend\n", __func__);
+        model.backend = ggml_backend_cuda_init();
+        if (!model.backend) {
+            fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
+        }
+    }
+#endif
+
 #ifdef GGML_USE_METAL
     if (n_gpu_layers > 0) {
         fprintf(stderr, "%s: using Metal backend\n", __func__);