diff --git a/cpp/ggml-metal.metal b/cpp/ggml-metal-whisper.metal
similarity index 100%
rename from cpp/ggml-metal.metal
rename to cpp/ggml-metal-whisper.metal
diff --git a/cpp/ggml-metal.m b/cpp/ggml-metal.m
index cf823bf..cbcc6f8 100644
--- a/cpp/ggml-metal.m
+++ b/cpp/ggml-metal.m
@@ -115,9 +115,9 @@
 static NSString * const msl_library_source = @"see metal.metal";
 
 // Here to assist with NSBundle Path Hack
-@interface GGMLMetalClass : NSObject
+@interface WSPGGMLMetalClass : NSObject
 @end
-@implementation GGMLMetalClass
+@implementation WSPGGMLMetalClass
 @end
 
 struct wsp_ggml_metal_context * wsp_ggml_metal_init(int n_cb) {
@@ -155,7 +155,7 @@ @implementation GGMLMetalClass
     {
         NSError * error = nil;
 
-        NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
+        NSBundle * bundle = [NSBundle bundleForClass:[WSPGGMLMetalClass class]];
         NSString * llamaBundlePath = [bundle pathForResource:@"llama_llama" ofType:@"bundle"];
         NSBundle * llamaBundle = [NSBundle bundleWithPath:llamaBundlePath];
         NSString * libPath = [llamaBundle pathForResource:@"default" ofType:@"metallib"];
@@ -177,8 +177,8 @@ @implementation GGMLMetalClass
         NSError * error = nil;
 
         //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
-        NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
-        NSString * path   = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
+        NSBundle * bundle = [NSBundle bundleForClass:[WSPGGMLMetalClass class]];
+        NSString * path   = [bundle pathForResource:@"ggml-metal-whisper" ofType:@"metal"];
         metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
 
         NSString * src  = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
diff --git a/cpp/ggml.c b/cpp/ggml.c
index 0f6b36e..ded2caf 100644
--- a/cpp/ggml.c
+++ b/cpp/ggml.c
@@ -19589,46 +19589,46 @@ size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void
 
 ////////////////////////////////////////////////////////////////////////////////
 
-struct gguf_str {
+struct wsp_gguf_str {
     uint64_t n;  // GGUFv2
     char * data;
 };
 
-static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
-    [GGUF_TYPE_UINT8]   = sizeof(uint8_t),
-    [GGUF_TYPE_INT8]    = sizeof(int8_t),
-    [GGUF_TYPE_UINT16]  = sizeof(uint16_t),
-    [GGUF_TYPE_INT16]   = sizeof(int16_t),
-    [GGUF_TYPE_UINT32]  = sizeof(uint32_t),
-    [GGUF_TYPE_INT32]   = sizeof(int32_t),
-    [GGUF_TYPE_FLOAT32] = sizeof(float),
-    [GGUF_TYPE_BOOL]    = sizeof(bool),
-    [GGUF_TYPE_STRING]  = sizeof(struct gguf_str),
-    [GGUF_TYPE_UINT64]  = sizeof(uint64_t),
-    [GGUF_TYPE_INT64]   = sizeof(int64_t),
-    [GGUF_TYPE_FLOAT64] = sizeof(double),
-    [GGUF_TYPE_ARRAY]   = 0, // undefined
+static const size_t WSP_GGUF_TYPE_SIZE[WSP_GGUF_TYPE_COUNT] = {
+    [WSP_GGUF_TYPE_UINT8]   = sizeof(uint8_t),
+    [WSP_GGUF_TYPE_INT8]    = sizeof(int8_t),
+    [WSP_GGUF_TYPE_UINT16]  = sizeof(uint16_t),
+    [WSP_GGUF_TYPE_INT16]   = sizeof(int16_t),
+    [WSP_GGUF_TYPE_UINT32]  = sizeof(uint32_t),
+    [WSP_GGUF_TYPE_INT32]   = sizeof(int32_t),
+    [WSP_GGUF_TYPE_FLOAT32] = sizeof(float),
+    [WSP_GGUF_TYPE_BOOL]    = sizeof(bool),
+    [WSP_GGUF_TYPE_STRING]  = sizeof(struct wsp_gguf_str),
+    [WSP_GGUF_TYPE_UINT64]  = sizeof(uint64_t),
+    [WSP_GGUF_TYPE_INT64]   = sizeof(int64_t),
+    [WSP_GGUF_TYPE_FLOAT64] = sizeof(double),
+    [WSP_GGUF_TYPE_ARRAY]   = 0, // undefined
 };
-static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
-
-static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
-    [GGUF_TYPE_UINT8]   = "u8",
-    [GGUF_TYPE_INT8]    = "i8",
-    [GGUF_TYPE_UINT16]  = "u16",
-    [GGUF_TYPE_INT16]   = "i16",
-    [GGUF_TYPE_UINT32]  = "u32",
-    [GGUF_TYPE_INT32]   = "i32",
-    [GGUF_TYPE_FLOAT32] = "f32",
-    [GGUF_TYPE_BOOL]    = "bool",
-    [GGUF_TYPE_STRING]  = "str",
-    [GGUF_TYPE_ARRAY]   = "arr",
-    [GGUF_TYPE_UINT64]  = "u64",
-    [GGUF_TYPE_INT64]   = "i64",
-    [GGUF_TYPE_FLOAT64] = "f64",
+static_assert(WSP_GGUF_TYPE_COUNT == 13, "WSP_GGUF_TYPE_COUNT != 13");
+
+static const char * WSP_GGUF_TYPE_NAME[WSP_GGUF_TYPE_COUNT] = {
+    [WSP_GGUF_TYPE_UINT8]   = "u8",
+    [WSP_GGUF_TYPE_INT8]    = "i8",
+    [WSP_GGUF_TYPE_UINT16]  = "u16",
+    [WSP_GGUF_TYPE_INT16]   = "i16",
+    [WSP_GGUF_TYPE_UINT32]  = "u32",
+    [WSP_GGUF_TYPE_INT32]   = "i32",
+    [WSP_GGUF_TYPE_FLOAT32] = "f32",
+    [WSP_GGUF_TYPE_BOOL]    = "bool",
+    [WSP_GGUF_TYPE_STRING]  = "str",
+    [WSP_GGUF_TYPE_ARRAY]   = "arr",
+    [WSP_GGUF_TYPE_UINT64]  = "u64",
+    [WSP_GGUF_TYPE_INT64]   = "i64",
+    [WSP_GGUF_TYPE_FLOAT64] = "f64",
 };
-static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
+static_assert(WSP_GGUF_TYPE_COUNT == 13, "WSP_GGUF_TYPE_COUNT != 13");
 
-union gguf_value {
+union wsp_gguf_value {
     uint8_t  uint8;
     int8_t   int8;
     uint16_t uint16;
@@ -19641,32 +19641,32 @@ union gguf_value {
     double   float64;
     bool     bool_;
 
-    struct gguf_str str;
+    struct wsp_gguf_str str;
 
     struct {
-        enum gguf_type type;
+        enum wsp_gguf_type type;
 
         uint64_t n;  // GGUFv2
         void * data;
     } arr;
 };
 
-struct gguf_kv {
-    struct gguf_str key;
+struct wsp_gguf_kv {
+    struct wsp_gguf_str key;
 
-    enum  gguf_type  type;
-    union gguf_value value;
+    enum  wsp_gguf_type  type;
+    union wsp_gguf_value value;
 };
 
-struct gguf_header {
+struct wsp_gguf_header {
     uint32_t magic;
     uint32_t version;
     uint64_t n_tensors; // GGUFv2
     uint64_t n_kv;      // GGUFv2
 };
 
-struct gguf_tensor_info {
-    struct gguf_str name;
+struct wsp_gguf_tensor_info {
+    struct wsp_gguf_str name;
 
     uint32_t n_dims;
     uint64_t ne[WSP_GGML_MAX_DIMS];
@@ -19680,11 +19680,11 @@ struct gguf_tensor_info {
     size_t size;
 };
 
-struct gguf_context {
-    struct gguf_header header;
+struct wsp_gguf_context {
+    struct wsp_gguf_header header;
 
-    struct gguf_kv          * kv;
-    struct gguf_tensor_info * infos;
+    struct wsp_gguf_kv          * kv;
+    struct wsp_gguf_tensor_info * infos;
 
     size_t alignment;
     size_t offset;    // offset of `data` from beginning of file
@@ -19694,50 +19694,50 @@ struct gguf_context {
     void * data;
 };
 
-static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
+static bool wsp_gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
     const size_t n = fread(dst, 1, size, file);
     *offset += n;
     return n == size;
 }
 
 // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
-static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
+static bool wsp_gguf_fread_str_cur(FILE * file, struct wsp_gguf_str * p, size_t * offset) {
     p->n    = 0;
     p->data = NULL;
 
     bool ok = true;
 
-    ok = ok && gguf_fread_el(file, &p->n,    sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
-    ok = ok && gguf_fread_el(file,  p->data, p->n,         offset);
+    ok = ok && wsp_gguf_fread_el(file, &p->n,    sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
+    ok = ok && wsp_gguf_fread_el(file,  p->data, p->n,         offset);
 
     return ok;
 }
 
-static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
+static bool wsp_gguf_fread_str_v1(FILE * file, struct wsp_gguf_str * p, size_t * offset) {
     p->n    = 0;
     p->data = NULL;
 
     bool ok = true;
 
     uint32_t n = 0;
-    ok = ok && gguf_fread_el(file, &n,       sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
-    ok = ok && gguf_fread_el(file,  p->data, p->n,      offset);
+    ok = ok && wsp_gguf_fread_el(file, &n,       sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
+    ok = ok && wsp_gguf_fread_el(file,  p->data, p->n,      offset);
 
     return ok;
 }
 
-struct gguf_context * gguf_init_empty(void) {
-    struct gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
+struct wsp_gguf_context * wsp_gguf_init_empty(void) {
+    struct wsp_gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct wsp_gguf_context));
 
-    ctx->header.magic     = GGUF_MAGIC;
-    ctx->header.version   = GGUF_VERSION;
+    ctx->header.magic     = WSP_GGUF_MAGIC;
+    ctx->header.version   = WSP_GGUF_VERSION;
     ctx->header.n_tensors = 0;
     ctx->header.n_kv      = 0;
 
     ctx->kv    = NULL;
     ctx->infos = NULL;
 
-    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
+    ctx->alignment = WSP_GGUF_DEFAULT_ALIGNMENT;
     ctx->offset    = 0;
     ctx->size      = 0;
 
@@ -19746,7 +19746,7 @@ struct gguf_context * gguf_init_empty(void) {
     return ctx;
 }
 
-struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
+struct wsp_gguf_context * wsp_gguf_init_from_file(const char * fname, struct wsp_gguf_init_params params) {
     FILE * file = fopen(fname, "rb");
     if (!file) {
         return NULL;
@@ -19759,9 +19759,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 
     // check the magic before making allocations
     {
-        gguf_fread_el(file, &magic, sizeof(magic), &offset);
+        wsp_gguf_fread_el(file, &magic, sizeof(magic), &offset);
 
-        if (magic != GGUF_MAGIC) {
+        if (magic != WSP_GGUF_MAGIC) {
             fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
             fclose(file);
             return NULL;
@@ -19770,7 +19770,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 
     bool ok = true;
 
-    struct gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
+    struct wsp_gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct wsp_gguf_context));
 
     // read the header
     {
@@ -19780,105 +19780,105 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
         ctx->infos = NULL;
         ctx->data  = NULL;
 
-        ok = ok && gguf_fread_el(file, &ctx->header.version,   sizeof(ctx->header.version),   &offset);
+        ok = ok && wsp_gguf_fread_el(file, &ctx->header.version,   sizeof(ctx->header.version),   &offset);
 
         if (ctx->header.version == 1) {
             // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
             uint32_t n_tensors = 0;
             uint32_t n_kv      = 0;
 
-            ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
-            ok = ok && gguf_fread_el(file, &n_kv,      sizeof(n_kv),      &offset);
+            ok = ok && wsp_gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
+            ok = ok && wsp_gguf_fread_el(file, &n_kv,      sizeof(n_kv),      &offset);
 
             ctx->header.n_tensors = n_tensors;
             ctx->header.n_kv      = n_kv;
         } else {
-            ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
-            ok = ok && gguf_fread_el(file, &ctx->header.n_kv,      sizeof(ctx->header.n_kv),      &offset);
+            ok = ok && wsp_gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
+            ok = ok && wsp_gguf_fread_el(file, &ctx->header.n_kv,      sizeof(ctx->header.n_kv),      &offset);
         }
 
         if (!ok) {
             fprintf(stderr, "%s: failed to read header\n", __func__);
             fclose(file);
-            gguf_free(ctx);
+            wsp_gguf_free(ctx);
             return NULL;
         }
     }
 
     // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
-    bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
+    bool (* wsp_gguf_fread_str)(FILE *, struct wsp_gguf_str *, size_t *) = wsp_gguf_fread_str_cur;
     if (ctx->header.version == 1) {
-        gguf_fread_str = gguf_fread_str_v1;
+        wsp_gguf_fread_str = wsp_gguf_fread_str_v1;
     }
 
     // read the kv pairs
     {
-        ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
+        ctx->kv = malloc(ctx->header.n_kv * sizeof(struct wsp_gguf_kv));
 
         for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
-            struct gguf_kv * kv = &ctx->kv[i];
+            struct wsp_gguf_kv * kv = &ctx->kv[i];
 
             //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
 
-            ok = ok && gguf_fread_str(file, &kv->key,                    &offset);
-            ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
+            ok = ok && wsp_gguf_fread_str(file, &kv->key,                    &offset);
+            ok = ok && wsp_gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
 
             //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
 
             switch (kv->type) {
-                case GGUF_TYPE_UINT8:   ok = ok && gguf_fread_el (file, &kv->value.uint8,   sizeof(kv->value.uint8),   &offset); break;
-                case GGUF_TYPE_INT8:    ok = ok && gguf_fread_el (file, &kv->value.int8,    sizeof(kv->value.int8),    &offset); break;
-                case GGUF_TYPE_UINT16:  ok = ok && gguf_fread_el (file, &kv->value.uint16,  sizeof(kv->value.uint16),  &offset); break;
-                case GGUF_TYPE_INT16:   ok = ok && gguf_fread_el (file, &kv->value.int16,   sizeof(kv->value.int16),   &offset); break;
-                case GGUF_TYPE_UINT32:  ok = ok && gguf_fread_el (file, &kv->value.uint32,  sizeof(kv->value.uint32),  &offset); break;
-                case GGUF_TYPE_INT32:   ok = ok && gguf_fread_el (file, &kv->value.int32,   sizeof(kv->value.int32),   &offset); break;
-                case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
-                case GGUF_TYPE_UINT64:  ok = ok && gguf_fread_el (file, &kv->value.uint64,  sizeof(kv->value.uint64),  &offset); break;
-                case GGUF_TYPE_INT64:   ok = ok && gguf_fread_el (file, &kv->value.int64,   sizeof(kv->value.int64),   &offset); break;
-                case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
-                case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (file, &kv->value.bool_,   sizeof(kv->value.bool_),   &offset); break;
-                case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(file, &kv->value.str,                                &offset); break;
-                case GGUF_TYPE_ARRAY:
+                case WSP_GGUF_TYPE_UINT8:   ok = ok && wsp_gguf_fread_el (file, &kv->value.uint8,   sizeof(kv->value.uint8),   &offset); break;
+                case WSP_GGUF_TYPE_INT8:    ok = ok && wsp_gguf_fread_el (file, &kv->value.int8,    sizeof(kv->value.int8),    &offset); break;
+                case WSP_GGUF_TYPE_UINT16:  ok = ok && wsp_gguf_fread_el (file, &kv->value.uint16,  sizeof(kv->value.uint16),  &offset); break;
+                case WSP_GGUF_TYPE_INT16:   ok = ok && wsp_gguf_fread_el (file, &kv->value.int16,   sizeof(kv->value.int16),   &offset); break;
+                case WSP_GGUF_TYPE_UINT32:  ok = ok && wsp_gguf_fread_el (file, &kv->value.uint32,  sizeof(kv->value.uint32),  &offset); break;
+                case WSP_GGUF_TYPE_INT32:   ok = ok && wsp_gguf_fread_el (file, &kv->value.int32,   sizeof(kv->value.int32),   &offset); break;
+                case WSP_GGUF_TYPE_FLOAT32: ok = ok && wsp_gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
+                case WSP_GGUF_TYPE_UINT64:  ok = ok && wsp_gguf_fread_el (file, &kv->value.uint64,  sizeof(kv->value.uint64),  &offset); break;
+                case WSP_GGUF_TYPE_INT64:   ok = ok && wsp_gguf_fread_el (file, &kv->value.int64,   sizeof(kv->value.int64),   &offset); break;
+                case WSP_GGUF_TYPE_FLOAT64: ok = ok && wsp_gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
+                case WSP_GGUF_TYPE_BOOL:    ok = ok && wsp_gguf_fread_el (file, &kv->value.bool_,   sizeof(kv->value.bool_),   &offset); break;
+                case WSP_GGUF_TYPE_STRING:  ok = ok && wsp_gguf_fread_str(file, &kv->value.str,                                &offset); break;
+                case WSP_GGUF_TYPE_ARRAY:
                     {
-                        ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
+                        ok = ok && wsp_gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
 
                         if (ctx->header.version == 1) {
                             // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
                             uint32_t n = 0;
-                            ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
+                            ok = ok && wsp_gguf_fread_el(file, &n, sizeof(n), &offset);
                             kv->value.arr.n = n;
                         } else {
-                            ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
+                            ok = ok && wsp_gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
                         }
 
                         switch (kv->value.arr.type) {
-                            case GGUF_TYPE_UINT8:
-                            case GGUF_TYPE_INT8:
-                            case GGUF_TYPE_UINT16:
-                            case GGUF_TYPE_INT16:
-                            case GGUF_TYPE_UINT32:
-                            case GGUF_TYPE_INT32:
-                            case GGUF_TYPE_FLOAT32:
-                            case GGUF_TYPE_UINT64:
-                            case GGUF_TYPE_INT64:
-                            case GGUF_TYPE_FLOAT64:
-                            case GGUF_TYPE_BOOL:
+                            case WSP_GGUF_TYPE_UINT8:
+                            case WSP_GGUF_TYPE_INT8:
+                            case WSP_GGUF_TYPE_UINT16:
+                            case WSP_GGUF_TYPE_INT16:
+                            case WSP_GGUF_TYPE_UINT32:
+                            case WSP_GGUF_TYPE_INT32:
+                            case WSP_GGUF_TYPE_FLOAT32:
+                            case WSP_GGUF_TYPE_UINT64:
+                            case WSP_GGUF_TYPE_INT64:
+                            case WSP_GGUF_TYPE_FLOAT64:
+                            case WSP_GGUF_TYPE_BOOL:
                                 {
-                                    kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
-                                    ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
+                                    kv->value.arr.data = malloc(kv->value.arr.n * WSP_GGUF_TYPE_SIZE[kv->value.arr.type]);
+                                    ok = ok && wsp_gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * WSP_GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
                                 } break;
-                            case GGUF_TYPE_STRING:
+                            case WSP_GGUF_TYPE_STRING:
                                 {
-                                    kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
+                                    kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct wsp_gguf_str));
                                     for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
-                                        ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
+                                        ok = ok && wsp_gguf_fread_str(file, &((struct wsp_gguf_str *) kv->value.arr.data)[j], &offset);
                                     }
                                 } break;
-                            case GGUF_TYPE_ARRAY:
-                            case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break;
+                            case WSP_GGUF_TYPE_ARRAY:
+                            case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break;
                         };
                     } break;
-                case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type");
+                case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type");
             };
 
             if (!ok) {
@@ -19889,51 +19889,51 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
         if (!ok) {
             fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
             fclose(file);
-            gguf_free(ctx);
+            wsp_gguf_free(ctx);
             return NULL;
         }
     }
 
     // read the tensor infos
     {
-        ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
+        ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct wsp_gguf_tensor_info));
 
         for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-            struct gguf_tensor_info * info = &ctx->infos[i];
+            struct wsp_gguf_tensor_info * info = &ctx->infos[i];
 
             for (int j = 0; j < WSP_GGML_MAX_DIMS; ++j) {
                 info->ne[j] = 1;
             }
 
-            ok = ok && gguf_fread_str(file, &info->name,                          &offset);
-            ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims),  &offset);
+            ok = ok && wsp_gguf_fread_str(file, &info->name,                          &offset);
+            ok = ok && wsp_gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims),  &offset);
             for (uint32_t j = 0; j < info->n_dims; ++j) {
                 if (ctx->header.version == 1) {
                     // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
                     uint32_t t = 0;
-                    ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
+                    ok = ok && wsp_gguf_fread_el(file, &t, sizeof(t), &offset);
                     info->ne[j] = t;
                 } else {
-                    ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
+                    ok = ok && wsp_gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
                 }
             }
-            ok = ok && gguf_fread_el (file, &info->type,   sizeof(info->type),    &offset);
-            ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset),  &offset);
+            ok = ok && wsp_gguf_fread_el (file, &info->type,   sizeof(info->type),    &offset);
+            ok = ok && wsp_gguf_fread_el (file, &info->offset, sizeof(info->offset),  &offset);
 
             if (!ok) {
                 fprintf(stderr, "%s: failed to read tensor info\n", __func__);
                 fclose(file);
-                gguf_free(ctx);
+                wsp_gguf_free(ctx);
                 return NULL;
             }
         }
     }
 
-    ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
+    ctx->alignment = WSP_GGUF_DEFAULT_ALIGNMENT;
 
-    int alignment_idx = gguf_find_key(ctx, "general.alignment");
+    int alignment_idx = wsp_gguf_find_key(ctx, "general.alignment");
     if (alignment_idx != -1) {
-        ctx->alignment = gguf_get_val_u32(ctx, alignment_idx);
+        ctx->alignment = wsp_gguf_get_val_u32(ctx, alignment_idx);
     }
 
     // we require the data section to be aligned, so take into account any padding
@@ -19953,7 +19953,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
     {
         ctx->size = 0;
         for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-            struct gguf_tensor_info * info = &ctx->infos[i];
+            struct wsp_gguf_tensor_info * info = &ctx->infos[i];
 
             const int64_t ne =
                 (int64_t) info->ne[0] *
@@ -19965,7 +19965,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                 fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n",
                         __func__, info->name.data, ne, wsp_ggml_blck_size(info->type));
                 fclose(file);
-                gguf_free(ctx);
+                wsp_gguf_free(ctx);
                 return NULL;
             }
 
@@ -19977,7 +19977,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
 
     // load the tensor data only if requested
     if (params.ctx != NULL) {
-        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
+        // if the provided wsp_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
         // otherwise, we load the binary blob into the created wsp_ggml_context as well, and point the "data" members of
         // the wsp_ggml_tensor structs to the appropriate locations in the binary blob
 
@@ -20005,13 +20005,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
             ok = ok && data != NULL;
 
             // read the binary blob with the tensor data
-            ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
+            ok = ok && wsp_gguf_fread_el(file, data->data, ctx->size, &offset);
 
             if (!ok) {
                 fprintf(stderr, "%s: failed to read tensor data\n", __func__);
                 fclose(file);
                 wsp_ggml_free(ctx_data);
-                gguf_free(ctx);
+                wsp_gguf_free(ctx);
                 return NULL;
             }
 
@@ -20050,7 +20050,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
             fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
             fclose(file);
             wsp_ggml_free(ctx_data);
-            gguf_free(ctx);
+            wsp_gguf_free(ctx);
             return NULL;
         }
 
@@ -20062,7 +20062,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
     return ctx;
 }
 
-void gguf_free(struct gguf_context * ctx) {
+void wsp_gguf_free(struct wsp_gguf_context * ctx) {
     if (ctx == NULL) {
         return;
     }
@@ -20070,23 +20070,23 @@ void gguf_free(struct gguf_context * ctx) {
     if (ctx->kv) {
         // free string memory - not great..
         for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
-            struct gguf_kv * kv = &ctx->kv[i];
+            struct wsp_gguf_kv * kv = &ctx->kv[i];
 
             if (kv->key.data) {
                 free(kv->key.data);
             }
 
-            if (kv->type == GGUF_TYPE_STRING) {
+            if (kv->type == WSP_GGUF_TYPE_STRING) {
                 if (kv->value.str.data) {
                     free(kv->value.str.data);
                 }
             }
 
-            if (kv->type == GGUF_TYPE_ARRAY) {
+            if (kv->type == WSP_GGUF_TYPE_ARRAY) {
                 if (kv->value.arr.data) {
-                    if (kv->value.arr.type == GGUF_TYPE_STRING) {
+                    if (kv->value.arr.type == WSP_GGUF_TYPE_STRING) {
                         for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
-                            struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
+                            struct wsp_gguf_str * str = &((struct wsp_gguf_str *) kv->value.arr.data)[j];
                             if (str->data) {
                                 free(str->data);
                             }
@@ -20102,7 +20102,7 @@ void gguf_free(struct gguf_context * ctx) {
 
     if (ctx->infos) {
         for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-            struct gguf_tensor_info * info = &ctx->infos[i];
+            struct wsp_gguf_tensor_info * info = &ctx->infos[i];
 
             if (info->name.data) {
                 free(info->name.data);
@@ -20115,38 +20115,38 @@ void gguf_free(struct gguf_context * ctx) {
     WSP_GGML_ALIGNED_FREE(ctx);
 }
 
-const char * gguf_type_name(enum gguf_type type) {
-    return GGUF_TYPE_NAME[type];
+const char * wsp_gguf_type_name(enum wsp_gguf_type type) {
+    return WSP_GGUF_TYPE_NAME[type];
 }
 
-int gguf_get_version(const struct gguf_context * ctx) {
+int wsp_gguf_get_version(const struct wsp_gguf_context * ctx) {
     return ctx->header.version;
 }
 
-size_t gguf_get_alignment(const struct gguf_context * ctx) {
+size_t wsp_gguf_get_alignment(const struct wsp_gguf_context * ctx) {
     return ctx->alignment;
 }
 
-size_t gguf_get_data_offset(const struct gguf_context * ctx) {
+size_t wsp_gguf_get_data_offset(const struct wsp_gguf_context * ctx) {
     return ctx->offset;
 }
 
-void * gguf_get_data(const struct gguf_context * ctx) {
+void * wsp_gguf_get_data(const struct wsp_gguf_context * ctx) {
     return ctx->data;
 }
 
-int gguf_get_n_kv(const struct gguf_context * ctx) {
+int wsp_gguf_get_n_kv(const struct wsp_gguf_context * ctx) {
     return ctx->header.n_kv;
 }
 
-int gguf_find_key(const struct gguf_context * ctx, const char * key) {
+int wsp_gguf_find_key(const struct wsp_gguf_context * ctx, const char * key) {
     // return -1 if key not found
     int keyfound = -1;
 
-    const int n_kv = gguf_get_n_kv(ctx);
+    const int n_kv = wsp_gguf_get_n_kv(ctx);
 
     for (int i = 0; i < n_kv; ++i) {
-        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
+        if (strcmp(key, wsp_gguf_get_key(ctx, i)) == 0) {
             keyfound = i;
             break;
         }
@@ -20155,92 +20155,92 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
     return keyfound;
 }
 
-const char * gguf_get_key(const struct gguf_context * ctx, int i) {
+const char * wsp_gguf_get_key(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].key.data;
 }
 
-enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) {
+enum wsp_gguf_type wsp_gguf_get_kv_type(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].type;
 }
 
-enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) {
+enum wsp_gguf_type wsp_gguf_get_arr_type(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.arr.type;
 }
 
-const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) {
+const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.arr.data;
 }
 
-const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
-    struct gguf_kv * kv = &ctx->kv[key_id];
-    struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
+const char * wsp_gguf_get_arr_str(const struct wsp_gguf_context * ctx, int key_id, int i) {
+    struct wsp_gguf_kv * kv = &ctx->kv[key_id];
+    struct wsp_gguf_str * str = &((struct wsp_gguf_str *) kv->value.arr.data)[i];
     return str->data;
 }
 
-int gguf_get_arr_n(const struct gguf_context * ctx, int i) {
+int wsp_gguf_get_arr_n(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.arr.n;
 }
 
-uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) {
+uint8_t wsp_gguf_get_val_u8(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.uint8;
 }
 
-int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) {
+int8_t wsp_gguf_get_val_i8(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.int8;
 }
 
-uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) {
+uint16_t wsp_gguf_get_val_u16(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.uint16;
 }
 
-int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) {
+int16_t wsp_gguf_get_val_i16(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.int16;
 }
 
-uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) {
+uint32_t wsp_gguf_get_val_u32(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.uint32;
 }
 
-int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) {
+int32_t wsp_gguf_get_val_i32(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.int32;
 }
 
-float gguf_get_val_f32(const struct gguf_context * ctx, int i) {
+float wsp_gguf_get_val_f32(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.float32;
 }
 
-uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) {
+uint64_t wsp_gguf_get_val_u64(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.uint64;
 }
 
-int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) {
+int64_t wsp_gguf_get_val_i64(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.int64;
 }
 
-double gguf_get_val_f64(const struct gguf_context * ctx, int i) {
+double wsp_gguf_get_val_f64(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.float64;
 }
 
-bool gguf_get_val_bool(const struct gguf_context * ctx, int i) {
+bool wsp_gguf_get_val_bool(const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.bool_;
 }
 
-const char * gguf_get_val_str (const struct gguf_context * ctx, int i) {
+const char * wsp_gguf_get_val_str (const struct wsp_gguf_context * ctx, int i) {
     return ctx->kv[i].value.str.data;
 }
 
-int gguf_get_n_tensors(const struct gguf_context * ctx) {
+int wsp_gguf_get_n_tensors(const struct wsp_gguf_context * ctx) {
     return ctx->header.n_tensors;
 }
 
-int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
+int wsp_gguf_find_tensor(const struct wsp_gguf_context * ctx, const char * name) {
     // return -1 if tensor not found
     int tensorfound = -1;
 
-    const int n_tensors = gguf_get_n_tensors(ctx);
+    const int n_tensors = wsp_gguf_get_n_tensors(ctx);
 
     for (int i = 0; i < n_tensors; ++i) {
-        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
+        if (strcmp(name, wsp_gguf_get_tensor_name(ctx, i)) == 0) {
             tensorfound = i;
             break;
         }
@@ -20249,24 +20249,24 @@ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
     return tensorfound;
 }
 
-size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
+size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i) {
     return ctx->infos[i].offset;
 }
 
-char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
+char * wsp_gguf_get_tensor_name(const struct wsp_gguf_context * ctx, int i) {
     return ctx->infos[i].name.data;
 }
 
 // returns the index
-static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
-    const int idx = gguf_find_key(ctx, key);
+static int wsp_gguf_get_or_add_key(struct wsp_gguf_context * ctx, const char * key) {
+    const int idx = wsp_gguf_find_key(ctx, key);
     if (idx >= 0) {
         return idx;
     }
 
-    const int n_kv = gguf_get_n_kv(ctx);
+    const int n_kv = wsp_gguf_get_n_kv(ctx);
 
-    ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
+    ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct wsp_gguf_kv));
     ctx->kv[n_kv].key.n    = strlen(key);
     ctx->kv[n_kv].key.data = strdup(key);
     ctx->header.n_kv++;
@@ -20274,156 +20274,156 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
     return n_kv;
 }
 
-void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_u8(struct wsp_gguf_context * ctx, const char * key, uint8_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type        = GGUF_TYPE_UINT8;
+    ctx->kv[idx].type        = WSP_GGUF_TYPE_UINT8;
     ctx->kv[idx].value.uint8 = val;
 }
 
-void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_i8(struct wsp_gguf_context * ctx, const char * key, int8_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type       = GGUF_TYPE_INT8;
+    ctx->kv[idx].type       = WSP_GGUF_TYPE_INT8;
     ctx->kv[idx].value.int8 = val;
 }
 
-void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_u16(struct wsp_gguf_context * ctx, const char * key, uint16_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type         = GGUF_TYPE_UINT16;
+    ctx->kv[idx].type         = WSP_GGUF_TYPE_UINT16;
     ctx->kv[idx].value.uint16 = val;
 }
 
-void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_i16(struct wsp_gguf_context * ctx, const char * key, int16_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type        = GGUF_TYPE_INT16;
+    ctx->kv[idx].type        = WSP_GGUF_TYPE_INT16;
     ctx->kv[idx].value.int16 = val;
 }
 
-void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_u32(struct wsp_gguf_context * ctx, const char * key, uint32_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type         = GGUF_TYPE_UINT32;
+    ctx->kv[idx].type         = WSP_GGUF_TYPE_UINT32;
     ctx->kv[idx].value.uint32 = val;
 }
 
-void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_i32(struct wsp_gguf_context * ctx, const char * key, int32_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type        = GGUF_TYPE_INT32;
+    ctx->kv[idx].type        = WSP_GGUF_TYPE_INT32;
     ctx->kv[idx].value.int32 = val;
 }
 
-void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_f32(struct wsp_gguf_context * ctx, const char * key, float val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type          = GGUF_TYPE_FLOAT32;
+    ctx->kv[idx].type          = WSP_GGUF_TYPE_FLOAT32;
     ctx->kv[idx].value.float32 = val;
 }
 
-void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_u64(struct wsp_gguf_context * ctx, const char * key, uint64_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type         = GGUF_TYPE_UINT64;
+    ctx->kv[idx].type         = WSP_GGUF_TYPE_UINT64;
     ctx->kv[idx].value.uint64 = val;
 }
 
-void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_i64(struct wsp_gguf_context * ctx, const char * key, int64_t val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type        = GGUF_TYPE_INT64;
+    ctx->kv[idx].type        = WSP_GGUF_TYPE_INT64;
     ctx->kv[idx].value.int64 = val;
 }
 
-void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_f64(struct wsp_gguf_context * ctx, const char * key, double val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type          = GGUF_TYPE_FLOAT64;
+    ctx->kv[idx].type          = WSP_GGUF_TYPE_FLOAT64;
     ctx->kv[idx].value.float64 = val;
 }
 
-void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_bool(struct wsp_gguf_context * ctx, const char * key, bool val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type        = GGUF_TYPE_BOOL;
+    ctx->kv[idx].type        = WSP_GGUF_TYPE_BOOL;
     ctx->kv[idx].value.bool_ = val;
 }
 
-void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_val_str(struct wsp_gguf_context * ctx, const char * key, const char * val) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type           = GGUF_TYPE_STRING;
+    ctx->kv[idx].type           = WSP_GGUF_TYPE_STRING;
     ctx->kv[idx].value.str.n    = strlen(val);
     ctx->kv[idx].value.str.data = strdup(val);
 }
 
-void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_arr_data(struct wsp_gguf_context * ctx, const char * key, enum wsp_gguf_type type, const void * data, int n) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
+    ctx->kv[idx].type           = WSP_GGUF_TYPE_ARRAY;
     ctx->kv[idx].value.arr.type = type;
     ctx->kv[idx].value.arr.n    = n;
-    ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]);
-    memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]);
+    ctx->kv[idx].value.arr.data = malloc(n*WSP_GGUF_TYPE_SIZE[type]);
+    memcpy(ctx->kv[idx].value.arr.data, data, n*WSP_GGUF_TYPE_SIZE[type]);
 }
 
-void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
-    const int idx = gguf_get_or_add_key(ctx, key);
+void wsp_gguf_set_arr_str(struct wsp_gguf_context * ctx, const char * key, const char ** data, int n) {
+    const int idx = wsp_gguf_get_or_add_key(ctx, key);
 
-    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
-    ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
+    ctx->kv[idx].type           = WSP_GGUF_TYPE_ARRAY;
+    ctx->kv[idx].value.arr.type = WSP_GGUF_TYPE_STRING;
     ctx->kv[idx].value.arr.n    = n;
-    ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
+    ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct wsp_gguf_str));
     for (int i = 0; i < n; i++) {
-        struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
+        struct wsp_gguf_str * str = &((struct wsp_gguf_str *)ctx->kv[idx].value.arr.data)[i];
         str->n    = strlen(data[i]);
         str->data = strdup(data[i]);
     }
 }
 
 // set or add KV pairs from another context
-void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
+void wsp_gguf_set_kv(struct wsp_gguf_context * ctx, struct wsp_gguf_context * src) {
     for (uint32_t i = 0; i < src->header.n_kv; i++) {
         switch (src->kv[i].type) {
-            case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, src->kv[i].key.data, src->kv[i].value.uint8);    break;
-            case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, src->kv[i].key.data, src->kv[i].value.int8);     break;
-            case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16);   break;
-            case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16);    break;
-            case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32);   break;
-            case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32);    break;
-            case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32);  break;
-            case GGUF_TYPE_UINT64:  gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64);   break;
-            case GGUF_TYPE_INT64:   gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64);    break;
-            case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64);  break;
-            case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_);    break;
-            case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
-            case GGUF_TYPE_ARRAY:
+            case WSP_GGUF_TYPE_UINT8:   wsp_gguf_set_val_u8  (ctx, src->kv[i].key.data, src->kv[i].value.uint8);    break;
+            case WSP_GGUF_TYPE_INT8:    wsp_gguf_set_val_i8  (ctx, src->kv[i].key.data, src->kv[i].value.int8);     break;
+            case WSP_GGUF_TYPE_UINT16:  wsp_gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16);   break;
+            case WSP_GGUF_TYPE_INT16:   wsp_gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16);    break;
+            case WSP_GGUF_TYPE_UINT32:  wsp_gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32);   break;
+            case WSP_GGUF_TYPE_INT32:   wsp_gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32);    break;
+            case WSP_GGUF_TYPE_FLOAT32: wsp_gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32);  break;
+            case WSP_GGUF_TYPE_UINT64:  wsp_gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64);   break;
+            case WSP_GGUF_TYPE_INT64:   wsp_gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64);    break;
+            case WSP_GGUF_TYPE_FLOAT64: wsp_gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64);  break;
+            case WSP_GGUF_TYPE_BOOL:    wsp_gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_);    break;
+            case WSP_GGUF_TYPE_STRING:  wsp_gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
+            case WSP_GGUF_TYPE_ARRAY:
                 {
-                    if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
+                    if (src->kv[i].value.arr.type == WSP_GGUF_TYPE_STRING) {
                         const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
                         for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
-                            data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
+                            data[j] = ((struct wsp_gguf_str *)src->kv[i].value.arr.data)[j].data;
                         }
-                        gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
+                        wsp_gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
                         free(data);
-                    } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
+                    } else if (src->kv[i].value.arr.type == WSP_GGUF_TYPE_ARRAY) {
                         WSP_GGML_ASSERT(false && "nested arrays not supported");
                     } else {
-                        gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
+                        wsp_gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
                     }
                 } break;
-            case GGUF_TYPE_COUNT:  WSP_GGML_ASSERT(false && "invalid type"); break;
+            case WSP_GGUF_TYPE_COUNT:  WSP_GGML_ASSERT(false && "invalid type"); break;
         }
     }
 }
 
-void gguf_add_tensor(
-             struct gguf_context * ctx,
+void wsp_gguf_add_tensor(
+             struct wsp_gguf_context * ctx,
         const struct wsp_ggml_tensor * tensor) {
     const int idx = ctx->header.n_tensors;
-    ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
+    ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct wsp_gguf_tensor_info));
 
     ctx->infos[idx].name.n    = strlen(tensor->name);
     ctx->infos[idx].name.data = strdup(tensor->name);
@@ -20449,8 +20449,8 @@ void gguf_add_tensor(
     ctx->header.n_tensors++;
 }
 
-void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum wsp_ggml_type type) {
-    const int idx = gguf_find_tensor(ctx, name);
+void wsp_gguf_set_tensor_type(struct wsp_gguf_context * ctx, const char * name, enum wsp_ggml_type type) {
+    const int idx = wsp_gguf_find_tensor(ctx, name);
     if (idx < 0) {
         WSP_GGML_ASSERT(false && "tensor not found");
     }
@@ -20458,8 +20458,8 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum wsp
     ctx->infos[idx].type = type;
 }
 
-void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
-    const int idx = gguf_find_tensor(ctx, name);
+void wsp_gguf_set_tensor_data(struct wsp_gguf_context * ctx, const char * name, const void * data, size_t size) {
+    const int idx = wsp_gguf_find_tensor(ctx, name);
     if (idx < 0) {
         WSP_GGML_ASSERT(false && "tensor not found");
     }
@@ -20473,23 +20473,23 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
     }
 }
 
-//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
+//static void wsp_gguf_fwrite_str(FILE * file, const struct wsp_gguf_str * val) {
 //    fwrite(&val->n,   sizeof(val->n),    1, file);
 //    fwrite(val->data, sizeof(char), val->n, file);
 //}
 //
-//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
+//static void wsp_gguf_fwrite_el(FILE * file, const void * val, size_t size) {
 //    fwrite(val, sizeof(char), size, file);
 //}
 
-struct gguf_buf {
+struct wsp_gguf_buf {
     void * data;
     size_t size;
     size_t offset;
 };
 
-static struct gguf_buf gguf_buf_init(size_t size) {
-    struct gguf_buf buf = {
+static struct wsp_gguf_buf wsp_gguf_buf_init(size_t size) {
+    struct wsp_gguf_buf buf = {
         /*buf.data   =*/ size == 0 ? NULL : malloc(size),
         /*buf.size   =*/ size,
         /*buf.offset =*/ 0,
@@ -20498,13 +20498,13 @@ static struct gguf_buf gguf_buf_init(size_t size) {
     return buf;
 }
 
-static void gguf_buf_free(struct gguf_buf buf) {
+static void wsp_gguf_buf_free(struct wsp_gguf_buf buf) {
     if (buf.data) {
         free(buf.data);
     }
 }
 
-static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
+static void wsp_gguf_buf_grow(struct wsp_gguf_buf * buf, size_t size) {
     if (buf->offset + size > buf->size) {
         buf->size = 1.5*(buf->offset + size);
         if (buf->data) {
@@ -20513,8 +20513,8 @@ static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
     }
 }
 
-static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
-    gguf_buf_grow(buf, sizeof(val->n) + val->n);
+static void wsp_gguf_bwrite_str(struct wsp_gguf_buf * buf, const struct wsp_gguf_str * val) {
+    wsp_gguf_buf_grow(buf, sizeof(val->n) + val->n);
 
     if (buf->data) {
         memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
@@ -20527,8 +20527,8 @@ static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val)
     buf->offset += val->n;
 }
 
-static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
-    gguf_buf_grow(buf, el_size);
+static void wsp_gguf_bwrite_el(struct wsp_gguf_buf * buf, const void * val, size_t el_size) {
+    wsp_gguf_buf_grow(buf, el_size);
 
     if (buf->data) {
         memcpy((char *) buf->data + buf->offset, val, el_size);
@@ -20536,78 +20536,78 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
     buf->offset += el_size;
 }
 
-static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
+static void wsp_gguf_write_to_buf(const struct wsp_gguf_context * ctx, struct wsp_gguf_buf * buf, bool only_meta) {
     // write header
-    gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic));
-    gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version));
-    gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
-    gguf_bwrite_el(buf, &ctx->header.n_kv,      sizeof(ctx->header.n_kv));
+    wsp_gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic));
+    wsp_gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version));
+    wsp_gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
+    wsp_gguf_bwrite_el(buf, &ctx->header.n_kv,      sizeof(ctx->header.n_kv));
 
     // write key-value pairs
     for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
-        struct gguf_kv * kv = &ctx->kv[i];
+        struct wsp_gguf_kv * kv = &ctx->kv[i];
 
-        gguf_bwrite_str(buf, &kv->key);
-        gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
+        wsp_gguf_bwrite_str(buf, &kv->key);
+        wsp_gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
 
         switch (kv->type) {
-            case GGUF_TYPE_UINT8:   gguf_bwrite_el( buf, &kv->value.uint8,   sizeof(kv->value.uint8)  ); break;
-            case GGUF_TYPE_INT8:    gguf_bwrite_el (buf, &kv->value.int8,    sizeof(kv->value.int8)   ); break;
-            case GGUF_TYPE_UINT16:  gguf_bwrite_el (buf, &kv->value.uint16,  sizeof(kv->value.uint16) ); break;
-            case GGUF_TYPE_INT16:   gguf_bwrite_el (buf, &kv->value.int16,   sizeof(kv->value.int16)  ); break;
-            case GGUF_TYPE_UINT32:  gguf_bwrite_el (buf, &kv->value.uint32,  sizeof(kv->value.uint32) ); break;
-            case GGUF_TYPE_INT32:   gguf_bwrite_el (buf, &kv->value.int32,   sizeof(kv->value.int32)  ); break;
-            case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
-            case GGUF_TYPE_UINT64:  gguf_bwrite_el (buf, &kv->value.uint64,  sizeof(kv->value.uint64) ); break;
-            case GGUF_TYPE_INT64:   gguf_bwrite_el (buf, &kv->value.int64,   sizeof(kv->value.int64)  ); break;
-            case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
-            case GGUF_TYPE_BOOL:    gguf_bwrite_el (buf, &kv->value.bool_,   sizeof(kv->value.bool_)  ); break;
-            case GGUF_TYPE_STRING:  gguf_bwrite_str(buf, &kv->value.str                               ); break;
-            case GGUF_TYPE_ARRAY:
+            case WSP_GGUF_TYPE_UINT8:   wsp_gguf_bwrite_el( buf, &kv->value.uint8,   sizeof(kv->value.uint8)  ); break;
+            case WSP_GGUF_TYPE_INT8:    wsp_gguf_bwrite_el (buf, &kv->value.int8,    sizeof(kv->value.int8)   ); break;
+            case WSP_GGUF_TYPE_UINT16:  wsp_gguf_bwrite_el (buf, &kv->value.uint16,  sizeof(kv->value.uint16) ); break;
+            case WSP_GGUF_TYPE_INT16:   wsp_gguf_bwrite_el (buf, &kv->value.int16,   sizeof(kv->value.int16)  ); break;
+            case WSP_GGUF_TYPE_UINT32:  wsp_gguf_bwrite_el (buf, &kv->value.uint32,  sizeof(kv->value.uint32) ); break;
+            case WSP_GGUF_TYPE_INT32:   wsp_gguf_bwrite_el (buf, &kv->value.int32,   sizeof(kv->value.int32)  ); break;
+            case WSP_GGUF_TYPE_FLOAT32: wsp_gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
+            case WSP_GGUF_TYPE_UINT64:  wsp_gguf_bwrite_el (buf, &kv->value.uint64,  sizeof(kv->value.uint64) ); break;
+            case WSP_GGUF_TYPE_INT64:   wsp_gguf_bwrite_el (buf, &kv->value.int64,   sizeof(kv->value.int64)  ); break;
+            case WSP_GGUF_TYPE_FLOAT64: wsp_gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
+            case WSP_GGUF_TYPE_BOOL:    wsp_gguf_bwrite_el (buf, &kv->value.bool_,   sizeof(kv->value.bool_)  ); break;
+            case WSP_GGUF_TYPE_STRING:  wsp_gguf_bwrite_str(buf, &kv->value.str                               ); break;
+            case WSP_GGUF_TYPE_ARRAY:
                 {
-                    gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
-                    gguf_bwrite_el(buf, &kv->value.arr.n,    sizeof(kv->value.arr.n)   );
+                    wsp_gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
+                    wsp_gguf_bwrite_el(buf, &kv->value.arr.n,    sizeof(kv->value.arr.n)   );
 
                     switch (kv->value.arr.type) {
-                        case GGUF_TYPE_UINT8:
-                        case GGUF_TYPE_INT8:
-                        case GGUF_TYPE_UINT16:
-                        case GGUF_TYPE_INT16:
-                        case GGUF_TYPE_UINT32:
-                        case GGUF_TYPE_INT32:
-                        case GGUF_TYPE_FLOAT32:
-                        case GGUF_TYPE_UINT64:
-                        case GGUF_TYPE_INT64:
-                        case GGUF_TYPE_FLOAT64:
-                        case GGUF_TYPE_BOOL:
+                        case WSP_GGUF_TYPE_UINT8:
+                        case WSP_GGUF_TYPE_INT8:
+                        case WSP_GGUF_TYPE_UINT16:
+                        case WSP_GGUF_TYPE_INT16:
+                        case WSP_GGUF_TYPE_UINT32:
+                        case WSP_GGUF_TYPE_INT32:
+                        case WSP_GGUF_TYPE_FLOAT32:
+                        case WSP_GGUF_TYPE_UINT64:
+                        case WSP_GGUF_TYPE_INT64:
+                        case WSP_GGUF_TYPE_FLOAT64:
+                        case WSP_GGUF_TYPE_BOOL:
                             {
-                                gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
+                                wsp_gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * WSP_GGUF_TYPE_SIZE[kv->value.arr.type]);
                             } break;
-                        case GGUF_TYPE_STRING:
+                        case WSP_GGUF_TYPE_STRING:
                             {
                                 for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
-                                    gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
+                                    wsp_gguf_bwrite_str(buf, &((struct wsp_gguf_str *) kv->value.arr.data)[j]);
                                 }
                             } break;
-                        case GGUF_TYPE_ARRAY:
-                        case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break;
+                        case WSP_GGUF_TYPE_ARRAY:
+                        case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break;
                     };
                 } break;
-            case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type");
+            case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type");
         };
     }
 
     // write tensor infos
     for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-        struct gguf_tensor_info * info = &ctx->infos[i];
+        struct wsp_gguf_tensor_info * info = &ctx->infos[i];
 
-        gguf_bwrite_str(buf, &info->name);
-        gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
+        wsp_gguf_bwrite_str(buf, &info->name);
+        wsp_gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
         for (uint32_t j = 0; j < info->n_dims; ++j) {
-            gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
+            wsp_gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
         }
-        gguf_bwrite_el(buf, &info->type,   sizeof(info->type));
-        gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
+        wsp_gguf_bwrite_el(buf, &info->type,   sizeof(info->type));
+        wsp_gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
     }
 
     // we require the data section to be aligned, so take into account any padding
@@ -20618,7 +20618,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
         if (offset_pad != offset) {
             uint8_t pad = 0;
             for (size_t i = 0; i < offset_pad - offset; ++i) {
-                gguf_bwrite_el(buf, &pad, sizeof(pad));
+                wsp_gguf_bwrite_el(buf, &pad, sizeof(pad));
             }
         }
     }
@@ -20631,17 +20631,17 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
 
     // write tensor data
     for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
-        struct gguf_tensor_info * info = &ctx->infos[i];
+        struct wsp_gguf_tensor_info * info = &ctx->infos[i];
 
         const size_t size     = info->size;
         const size_t size_pad = WSP_GGML_PAD(size, ctx->alignment);
 
-        gguf_bwrite_el(buf, info->data, size);
+        wsp_gguf_bwrite_el(buf, info->data, size);
 
         if (size_pad != size) {
             uint8_t pad = 0;
             for (size_t j = 0; j < size_pad - size; ++j) {
-                gguf_bwrite_el(buf, &pad, sizeof(pad));
+                wsp_gguf_bwrite_el(buf, &pad, sizeof(pad));
             }
         }
 
@@ -20651,40 +20651,40 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
     }
 }
 
-void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
+void wsp_gguf_write_to_file(const struct wsp_gguf_context * ctx, const char * fname, bool only_meta) {
     FILE * file = fopen(fname, "wb");
     if (!file) {
         WSP_GGML_ASSERT(false && "failed to open file for writing");
     }
 
-    struct gguf_buf buf = gguf_buf_init(16*1024);
+    struct wsp_gguf_buf buf = wsp_gguf_buf_init(16*1024);
 
-    gguf_write_to_buf(ctx, &buf, only_meta);
+    wsp_gguf_write_to_buf(ctx, &buf, only_meta);
 
     fwrite(buf.data, 1, buf.offset, file);
 
-    gguf_buf_free(buf);
+    wsp_gguf_buf_free(buf);
 
     fclose(file);
 }
 
-size_t gguf_get_meta_size(const struct gguf_context * ctx) {
+size_t wsp_gguf_get_meta_size(const struct wsp_gguf_context * ctx) {
     // no allocs - only compute size
-    struct gguf_buf buf = gguf_buf_init(0);
+    struct wsp_gguf_buf buf = wsp_gguf_buf_init(0);
 
-    gguf_write_to_buf(ctx, &buf, true);
+    wsp_gguf_write_to_buf(ctx, &buf, true);
 
     return buf.offset;
 }
 
-void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
-    struct gguf_buf buf = gguf_buf_init(16*1024);
+void wsp_gguf_get_meta_data(const struct wsp_gguf_context * ctx, void * data) {
+    struct wsp_gguf_buf buf = wsp_gguf_buf_init(16*1024);
 
-    gguf_write_to_buf(ctx, &buf, true);
+    wsp_gguf_write_to_buf(ctx, &buf, true);
 
     memcpy(data, buf.data, buf.offset);
 
-    gguf_buf_free(buf);
+    wsp_gguf_buf_free(buf);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/ggml.h b/cpp/ggml.h
index a165aae..f1bbd88 100644
--- a/cpp/ggml.h
+++ b/cpp/ggml.h
@@ -231,10 +231,10 @@
 #define WSP_GGML_EXIT_SUCCESS 0
 #define WSP_GGML_EXIT_ABORTED 1
 
-#define GGUF_MAGIC   0x46554747 // "GGUF"
-#define GGUF_VERSION 2
+#define WSP_GGUF_MAGIC   0x46554747 // "GGUF"
+#define WSP_GGUF_VERSION 2
 
-#define GGUF_DEFAULT_ALIGNMENT 32
+#define WSP_GGUF_DEFAULT_ALIGNMENT 32
 
 #define WSP_GGML_UNUSED(x) (void)(x)
 
@@ -1841,122 +1841,122 @@ extern "C" {
     // gguf
     //
 
-    enum gguf_type {
-        GGUF_TYPE_UINT8   = 0,
-        GGUF_TYPE_INT8    = 1,
-        GGUF_TYPE_UINT16  = 2,
-        GGUF_TYPE_INT16   = 3,
-        GGUF_TYPE_UINT32  = 4,
-        GGUF_TYPE_INT32   = 5,
-        GGUF_TYPE_FLOAT32 = 6,
-        GGUF_TYPE_BOOL    = 7,
-        GGUF_TYPE_STRING  = 8,
-        GGUF_TYPE_ARRAY   = 9,
-        GGUF_TYPE_UINT64  = 10,
-        GGUF_TYPE_INT64   = 11,
-        GGUF_TYPE_FLOAT64 = 12,
-        GGUF_TYPE_COUNT,       // marks the end of the enum
+    enum wsp_gguf_type {
+        WSP_GGUF_TYPE_UINT8   = 0,
+        WSP_GGUF_TYPE_INT8    = 1,
+        WSP_GGUF_TYPE_UINT16  = 2,
+        WSP_GGUF_TYPE_INT16   = 3,
+        WSP_GGUF_TYPE_UINT32  = 4,
+        WSP_GGUF_TYPE_INT32   = 5,
+        WSP_GGUF_TYPE_FLOAT32 = 6,
+        WSP_GGUF_TYPE_BOOL    = 7,
+        WSP_GGUF_TYPE_STRING  = 8,
+        WSP_GGUF_TYPE_ARRAY   = 9,
+        WSP_GGUF_TYPE_UINT64  = 10,
+        WSP_GGUF_TYPE_INT64   = 11,
+        WSP_GGUF_TYPE_FLOAT64 = 12,
+        WSP_GGUF_TYPE_COUNT,       // marks the end of the enum
     };
 
-    struct gguf_context;
+    struct wsp_gguf_context;
 
-    struct gguf_init_params {
+    struct wsp_gguf_init_params {
         bool no_alloc;
 
         // if not NULL, create a wsp_ggml_context and allocate the tensor data in it
         struct wsp_ggml_context ** ctx;
     };
 
-    WSP_GGML_API struct gguf_context * gguf_init_empty(void);
-    WSP_GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
-    //WSP_GGML_API struct gguf_context * gguf_init_from_buffer(..);
+    WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_empty(void);
+    WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file(const char * fname, struct wsp_gguf_init_params params);
+    //WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_buffer(..);
 
-    WSP_GGML_API void gguf_free(struct gguf_context * ctx);
+    WSP_GGML_API void wsp_gguf_free(struct wsp_gguf_context * ctx);
 
-    WSP_GGML_API const char * gguf_type_name(enum gguf_type type);
+    WSP_GGML_API const char * wsp_gguf_type_name(enum wsp_gguf_type type);
 
-    WSP_GGML_API int    gguf_get_version    (const struct gguf_context * ctx);
-    WSP_GGML_API size_t gguf_get_alignment  (const struct gguf_context * ctx);
-    WSP_GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
-    WSP_GGML_API void * gguf_get_data       (const struct gguf_context * ctx);
+    WSP_GGML_API int    wsp_gguf_get_version    (const struct wsp_gguf_context * ctx);
+    WSP_GGML_API size_t wsp_gguf_get_alignment  (const struct wsp_gguf_context * ctx);
+    WSP_GGML_API size_t wsp_gguf_get_data_offset(const struct wsp_gguf_context * ctx);
+    WSP_GGML_API void * wsp_gguf_get_data       (const struct wsp_gguf_context * ctx);
 
-    WSP_GGML_API int          gguf_get_n_kv(const struct gguf_context * ctx);
-    WSP_GGML_API int          gguf_find_key(const struct gguf_context * ctx, const char * key);
-    WSP_GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
+    WSP_GGML_API int          wsp_gguf_get_n_kv(const struct wsp_gguf_context * ctx);
+    WSP_GGML_API int          wsp_gguf_find_key(const struct wsp_gguf_context * ctx, const char * key);
+    WSP_GGML_API const char * wsp_gguf_get_key (const struct wsp_gguf_context * ctx, int i);
 
-    WSP_GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
-    WSP_GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
+    WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_kv_type (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_arr_type(const struct wsp_gguf_context * ctx, int i);
 
     // results are undefined if the wrong type is used for the key
-    WSP_GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int i);
-    WSP_GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int i);
-    WSP_GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int i);
-    WSP_GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int i);
-    WSP_GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
-    WSP_GGML_API int          gguf_get_arr_n   (const struct gguf_context * ctx, int i);
-    WSP_GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
-    WSP_GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
-
-    WSP_GGML_API int    gguf_get_n_tensors    (const struct gguf_context * ctx);
-    WSP_GGML_API int    gguf_find_tensor      (const struct gguf_context * ctx, const char * name);
-    WSP_GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
-    WSP_GGML_API char * gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
+    WSP_GGML_API uint8_t      wsp_gguf_get_val_u8  (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API int8_t       wsp_gguf_get_val_i8  (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API uint16_t     wsp_gguf_get_val_u16 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API int16_t      wsp_gguf_get_val_i16 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API uint32_t     wsp_gguf_get_val_u32 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API int32_t      wsp_gguf_get_val_i32 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API float        wsp_gguf_get_val_f32 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API uint64_t     wsp_gguf_get_val_u64 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API int64_t      wsp_gguf_get_val_i64 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API double       wsp_gguf_get_val_f64 (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API bool         wsp_gguf_get_val_bool(const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API const char * wsp_gguf_get_val_str (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API int          wsp_gguf_get_arr_n   (const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API const char * wsp_gguf_get_arr_str (const struct wsp_gguf_context * ctx, int key_id, int i);
+
+    WSP_GGML_API int    wsp_gguf_get_n_tensors    (const struct wsp_gguf_context * ctx);
+    WSP_GGML_API int    wsp_gguf_find_tensor      (const struct wsp_gguf_context * ctx, const char * name);
+    WSP_GGML_API size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i);
+    WSP_GGML_API char * wsp_gguf_get_tensor_name  (const struct wsp_gguf_context * ctx, int i);
 
     // overrides existing values or adds a new one
-    WSP_GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val);
-    WSP_GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t   val);
-    WSP_GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
-    WSP_GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t  val);
-    WSP_GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
-    WSP_GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t  val);
-    WSP_GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float    val);
-    WSP_GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
-    WSP_GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t  val);
-    WSP_GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double   val);
-    WSP_GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool     val);
-    WSP_GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
-    WSP_GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
-    WSP_GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
+    WSP_GGML_API void wsp_gguf_set_val_u8  (struct wsp_gguf_context * ctx, const char * key, uint8_t  val);
+    WSP_GGML_API void wsp_gguf_set_val_i8  (struct wsp_gguf_context * ctx, const char * key, int8_t   val);
+    WSP_GGML_API void wsp_gguf_set_val_u16 (struct wsp_gguf_context * ctx, const char * key, uint16_t val);
+    WSP_GGML_API void wsp_gguf_set_val_i16 (struct wsp_gguf_context * ctx, const char * key, int16_t  val);
+    WSP_GGML_API void wsp_gguf_set_val_u32 (struct wsp_gguf_context * ctx, const char * key, uint32_t val);
+    WSP_GGML_API void wsp_gguf_set_val_i32 (struct wsp_gguf_context * ctx, const char * key, int32_t  val);
+    WSP_GGML_API void wsp_gguf_set_val_f32 (struct wsp_gguf_context * ctx, const char * key, float    val);
+    WSP_GGML_API void wsp_gguf_set_val_u64 (struct wsp_gguf_context * ctx, const char * key, uint64_t val);
+    WSP_GGML_API void wsp_gguf_set_val_i64 (struct wsp_gguf_context * ctx, const char * key, int64_t  val);
+    WSP_GGML_API void wsp_gguf_set_val_f64 (struct wsp_gguf_context * ctx, const char * key, double   val);
+    WSP_GGML_API void wsp_gguf_set_val_bool(struct wsp_gguf_context * ctx, const char * key, bool     val);
+    WSP_GGML_API void wsp_gguf_set_val_str (struct wsp_gguf_context * ctx, const char * key, const char * val);
+    WSP_GGML_API void wsp_gguf_set_arr_data(struct wsp_gguf_context * ctx, const char * key, enum wsp_gguf_type type, const void * data, int n);
+    WSP_GGML_API void wsp_gguf_set_arr_str (struct wsp_gguf_context * ctx, const char * key, const char ** data, int n);
 
     // set or add KV pairs from another context
-    WSP_GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
+    WSP_GGML_API void wsp_gguf_set_kv(struct wsp_gguf_context * ctx, struct wsp_gguf_context * src);
 
     // manage tensor info
-    WSP_GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct wsp_ggml_tensor * tensor);
-    WSP_GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum wsp_ggml_type type);
-    WSP_GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
+    WSP_GGML_API void wsp_gguf_add_tensor(struct wsp_gguf_context * ctx, const struct wsp_ggml_tensor * tensor);
+    WSP_GGML_API void wsp_gguf_set_tensor_type(struct wsp_gguf_context * ctx, const char * name, enum wsp_ggml_type type);
+    WSP_GGML_API void wsp_gguf_set_tensor_data(struct wsp_gguf_context * ctx, const char * name, const void * data, size_t size);
 
     // writing gguf files can be done in 2 ways:
     //
-    // - write the entire gguf_context to a binary file in a single pass:
+    // - write the entire wsp_gguf_context to a binary file in a single pass:
     //
-    //   gguf_write_to_file(ctx, fname);
+    //   wsp_gguf_write_to_file(ctx, fname);
     //
     // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
     //
     //   FILE * f = fopen(fname, "wb");
-    //   fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
+    //   fseek(f, wsp_gguf_get_meta_size(ctx), SEEK_SET);
     //   fwrite(f, ...);
-    //   void * data = gguf_meta_get_meta_data(ctx);
+    //   void * data = wsp_gguf_meta_get_meta_data(ctx);
     //   fseek(f, 0, SEEK_SET);
-    //   fwrite(f, data, gguf_get_meta_size(ctx));
+    //   fwrite(f, data, wsp_gguf_get_meta_size(ctx));
     //   free(data);
     //   fclose(f);
     //
 
     // write the entire context to a binary file
-    WSP_GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
+    WSP_GGML_API void wsp_gguf_write_to_file(const struct wsp_gguf_context * ctx, const char * fname, bool only_meta);
 
     // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
-    WSP_GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
-    WSP_GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data);
+    WSP_GGML_API size_t wsp_gguf_get_meta_size(const struct wsp_gguf_context * ctx);
+    WSP_GGML_API void   wsp_gguf_get_meta_data(const struct wsp_gguf_context * ctx, void * data);
 
     //
     // system info
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index ff9eed5..8549b68 100755
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -9,7 +9,7 @@ cp ./whisper.cpp/ggml-alloc.h ./cpp/ggml-alloc.h
 cp ./whisper.cpp/ggml-alloc.c ./cpp/ggml-alloc.c
 cp ./whisper.cpp/ggml-metal.h ./cpp/ggml-metal.h
 cp ./whisper.cpp/ggml-metal.m ./cpp/ggml-metal.m
-cp ./whisper.cpp/ggml-metal.metal ./cpp/ggml-metal.metal
+cp ./whisper.cpp/ggml-metal.metal ./cpp/ggml-metal-whisper.metal
 cp ./whisper.cpp/whisper.h ./cpp/whisper.h
 cp ./whisper.cpp/whisper.cpp ./cpp/whisper.cpp
 
@@ -35,10 +35,14 @@ for file in "${files[@]}"; do
   if [ "$OS" = "Darwin" ]; then
     sed -i '' 's/GGML_/WSP_GGML_/g' $file
     sed -i '' 's/ggml_/wsp_ggml_/g' $file
+    sed -i '' 's/GGUF_/WSP_GGUF_/g' $file
+    sed -i '' 's/gguf_/wsp_gguf_/g' $file
     sed -i '' 's/GGMLMetalClass/WSPGGMLMetalClass/g' $file
   else
     sed -i 's/GGML_/WSP_GGML_/g' $file
     sed -i 's/ggml_/wsp_ggml_/g' $file
+    sed -i 's/GGUF_/WSP_GGUF_/g' $file
+    sed -i 's/gguf_/wsp_gguf_/g' $file
     sed -i 's/GGMLMetalClass/WSPGGMLMetalClass/g' $file
   fi
 done
diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
index 7cdf569..222da8a 100644
--- a/scripts/ggml-metal.m.patch
+++ b/scripts/ggml-metal.m.patch
@@ -1,5 +1,14 @@
---- ggml-metal.m.orig	2023-10-05 09:26:55
-+++ ggml-metal.m	2023-10-05 09:26:56
+--- ggml-metal.m.orig	2023-10-25 17:55:09
++++ ggml-metal.m	2023-10-25 17:55:42
+@@ -178,7 +178,7 @@
+ 
+         //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
+         NSBundle * bundle = [NSBundle bundleForClass:[WSPGGMLMetalClass class]];
+-        NSString * path   = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
++        NSString * path   = [bundle pathForResource:@"ggml-metal-whisper" ofType:@"metal"];
+         metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
+ 
+         NSString * src  = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
 @@ -207,7 +207,7 @@
  #define WSP_GGML_METAL_ADD_KERNEL(name) \
          ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
@@ -18,7 +27,7 @@
  
      WSP_GGML_METAL_DEL_KERNEL(add);
      WSP_GGML_METAL_DEL_KERNEL(add_row);
-@@ -342,16 +340,6 @@
+@@ -342,17 +340,7 @@
      WSP_GGML_METAL_DEL_KERNEL(cpy_f16_f16);
  
  #undef WSP_GGML_METAL_DEL_KERNEL
@@ -26,12 +35,13 @@
 -    for (int i = 0; i < ctx->n_buffers; ++i) {
 -        [ctx->buffers[i].metal release];
 -    }
--
+ 
 -    [ctx->library release];
 -    [ctx->queue release];
 -    [ctx->device release];
 -
 -    dispatch_release(ctx->d_queue);
- 
+-
      free(ctx);
  }
+