diff --git a/cpp/ggml-metal.metal b/cpp/ggml-metal-whisper.metal similarity index 100% rename from cpp/ggml-metal.metal rename to cpp/ggml-metal-whisper.metal diff --git a/cpp/ggml-metal.m b/cpp/ggml-metal.m index cf823bf..cbcc6f8 100644 --- a/cpp/ggml-metal.m +++ b/cpp/ggml-metal.m @@ -115,9 +115,9 @@ static NSString * const msl_library_source = @"see metal.metal"; // Here to assist with NSBundle Path Hack -@interface GGMLMetalClass : NSObject +@interface WSPGGMLMetalClass : NSObject @end -@implementation GGMLMetalClass +@implementation WSPGGMLMetalClass @end struct wsp_ggml_metal_context * wsp_ggml_metal_init(int n_cb) { @@ -155,7 +155,7 @@ @implementation GGMLMetalClass { NSError * error = nil; - NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; + NSBundle * bundle = [NSBundle bundleForClass:[WSPGGMLMetalClass class]]; NSString * llamaBundlePath = [bundle pathForResource:@"llama_llama" ofType:@"bundle"]; NSBundle * llamaBundle = [NSBundle bundleWithPath:llamaBundlePath]; NSString * libPath = [llamaBundle pathForResource:@"default" ofType:@"metallib"]; @@ -177,8 +177,8 @@ @implementation GGMLMetalClass NSError * error = nil; //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"]; - NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; - NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; + NSBundle * bundle = [NSBundle bundleForClass:[WSPGGMLMetalClass class]]; + NSString * path = [bundle pathForResource:@"ggml-metal-whisper" ofType:@"metal"]; metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]); NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; diff --git a/cpp/ggml.c b/cpp/ggml.c index 0f6b36e..ded2caf 100644 --- a/cpp/ggml.c +++ b/cpp/ggml.c @@ -19589,46 +19589,46 @@ size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void //////////////////////////////////////////////////////////////////////////////// -struct gguf_str { +struct wsp_gguf_str { uint64_t n; // GGUFv2 char * data; }; -static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { - [GGUF_TYPE_UINT8] = sizeof(uint8_t), - [GGUF_TYPE_INT8] = sizeof(int8_t), - [GGUF_TYPE_UINT16] = sizeof(uint16_t), - [GGUF_TYPE_INT16] = sizeof(int16_t), - [GGUF_TYPE_UINT32] = sizeof(uint32_t), - [GGUF_TYPE_INT32] = sizeof(int32_t), - [GGUF_TYPE_FLOAT32] = sizeof(float), - [GGUF_TYPE_BOOL] = sizeof(bool), - [GGUF_TYPE_STRING] = sizeof(struct gguf_str), - [GGUF_TYPE_UINT64] = sizeof(uint64_t), - [GGUF_TYPE_INT64] = sizeof(int64_t), - [GGUF_TYPE_FLOAT64] = sizeof(double), - [GGUF_TYPE_ARRAY] = 0, // undefined +static const size_t WSP_GGUF_TYPE_SIZE[WSP_GGUF_TYPE_COUNT] = { + [WSP_GGUF_TYPE_UINT8] = sizeof(uint8_t), + [WSP_GGUF_TYPE_INT8] = sizeof(int8_t), + [WSP_GGUF_TYPE_UINT16] = sizeof(uint16_t), + [WSP_GGUF_TYPE_INT16] = sizeof(int16_t), + [WSP_GGUF_TYPE_UINT32] = sizeof(uint32_t), + [WSP_GGUF_TYPE_INT32] = sizeof(int32_t), + [WSP_GGUF_TYPE_FLOAT32] = sizeof(float), + [WSP_GGUF_TYPE_BOOL] = sizeof(bool), + [WSP_GGUF_TYPE_STRING] = sizeof(struct wsp_gguf_str), + [WSP_GGUF_TYPE_UINT64] = sizeof(uint64_t), + [WSP_GGUF_TYPE_INT64] = sizeof(int64_t), + [WSP_GGUF_TYPE_FLOAT64] = sizeof(double), + [WSP_GGUF_TYPE_ARRAY] = 0, // undefined }; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); - -static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { - [GGUF_TYPE_UINT8] = "u8", - [GGUF_TYPE_INT8] = "i8", - [GGUF_TYPE_UINT16] = "u16", - [GGUF_TYPE_INT16] = "i16", - [GGUF_TYPE_UINT32] = "u32", - [GGUF_TYPE_INT32] = "i32", - [GGUF_TYPE_FLOAT32] = "f32", - [GGUF_TYPE_BOOL] = "bool", - [GGUF_TYPE_STRING] = "str", - [GGUF_TYPE_ARRAY] = "arr", - [GGUF_TYPE_UINT64] = "u64", - [GGUF_TYPE_INT64] = "i64", - [GGUF_TYPE_FLOAT64] = "f64", +static_assert(WSP_GGUF_TYPE_COUNT == 13, "WSP_GGUF_TYPE_COUNT != 13"); + +static const char * WSP_GGUF_TYPE_NAME[WSP_GGUF_TYPE_COUNT] = { + [WSP_GGUF_TYPE_UINT8] = "u8", + [WSP_GGUF_TYPE_INT8] = "i8", + [WSP_GGUF_TYPE_UINT16] = "u16", + [WSP_GGUF_TYPE_INT16] = "i16", + [WSP_GGUF_TYPE_UINT32] = "u32", + [WSP_GGUF_TYPE_INT32] = "i32", + [WSP_GGUF_TYPE_FLOAT32] = "f32", + [WSP_GGUF_TYPE_BOOL] = "bool", + [WSP_GGUF_TYPE_STRING] = "str", + [WSP_GGUF_TYPE_ARRAY] = "arr", + [WSP_GGUF_TYPE_UINT64] = "u64", + [WSP_GGUF_TYPE_INT64] = "i64", + [WSP_GGUF_TYPE_FLOAT64] = "f64", }; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); +static_assert(WSP_GGUF_TYPE_COUNT == 13, "WSP_GGUF_TYPE_COUNT != 13"); -union gguf_value { +union wsp_gguf_value { uint8_t uint8; int8_t int8; uint16_t uint16; @@ -19641,32 +19641,32 @@ union gguf_value { double float64; bool bool_; - struct gguf_str str; + struct wsp_gguf_str str; struct { - enum gguf_type type; + enum wsp_gguf_type type; uint64_t n; // GGUFv2 void * data; } arr; }; -struct gguf_kv { - struct gguf_str key; +struct wsp_gguf_kv { + struct wsp_gguf_str key; - enum gguf_type type; - union gguf_value value; + enum wsp_gguf_type type; + union wsp_gguf_value value; }; -struct gguf_header { +struct wsp_gguf_header { uint32_t magic; uint32_t version; uint64_t n_tensors; // GGUFv2 uint64_t n_kv; // GGUFv2 }; -struct gguf_tensor_info { - struct gguf_str name; +struct wsp_gguf_tensor_info { + struct wsp_gguf_str name; uint32_t n_dims; uint64_t ne[WSP_GGML_MAX_DIMS]; @@ -19680,11 +19680,11 @@ struct gguf_tensor_info { size_t size; }; -struct gguf_context { - struct gguf_header header; +struct wsp_gguf_context { + struct wsp_gguf_header header; - struct gguf_kv * kv; - struct gguf_tensor_info * infos; + struct wsp_gguf_kv * kv; + struct wsp_gguf_tensor_info * infos; size_t alignment; size_t offset; // offset of `data` from beginning of file @@ -19694,50 +19694,50 @@ struct gguf_context { void * data; }; -static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) { +static bool wsp_gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) { const size_t n = fread(dst, 1, size, file); *offset += n; return n == size; } // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 -static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) { +static bool wsp_gguf_fread_str_cur(FILE * file, struct wsp_gguf_str * p, size_t * offset) { p->n = 0; p->data = NULL; bool ok = true; - ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1); - ok = ok && gguf_fread_el(file, p->data, p->n, offset); + ok = ok && wsp_gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1); + ok = ok && wsp_gguf_fread_el(file, p->data, p->n, offset); return ok; } -static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) { +static bool wsp_gguf_fread_str_v1(FILE * file, struct wsp_gguf_str * p, size_t * offset) { p->n = 0; p->data = NULL; bool ok = true; uint32_t n = 0; - ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n; - ok = ok && gguf_fread_el(file, p->data, p->n, offset); + ok = ok && wsp_gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n; + ok = ok && wsp_gguf_fread_el(file, p->data, p->n, offset); return ok; } -struct gguf_context * gguf_init_empty(void) { - struct gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); +struct wsp_gguf_context * wsp_gguf_init_empty(void) { + struct wsp_gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct wsp_gguf_context)); - ctx->header.magic = GGUF_MAGIC; - ctx->header.version = GGUF_VERSION; + ctx->header.magic = WSP_GGUF_MAGIC; + ctx->header.version = WSP_GGUF_VERSION; ctx->header.n_tensors = 0; ctx->header.n_kv = 0; ctx->kv = NULL; ctx->infos = NULL; - ctx->alignment = GGUF_DEFAULT_ALIGNMENT; + ctx->alignment = WSP_GGUF_DEFAULT_ALIGNMENT; ctx->offset = 0; ctx->size = 0; @@ -19746,7 +19746,7 @@ struct gguf_context * gguf_init_empty(void) { return ctx; } -struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { +struct wsp_gguf_context * wsp_gguf_init_from_file(const char * fname, struct wsp_gguf_init_params params) { FILE * file = fopen(fname, "rb"); if (!file) { return NULL; @@ -19759,9 +19759,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // check the magic before making allocations { - gguf_fread_el(file, &magic, sizeof(magic), &offset); + wsp_gguf_fread_el(file, &magic, sizeof(magic), &offset); - if (magic != GGUF_MAGIC) { + if (magic != WSP_GGUF_MAGIC) { fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); fclose(file); return NULL; @@ -19770,7 +19770,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p bool ok = true; - struct gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); + struct wsp_gguf_context * ctx = WSP_GGML_ALIGNED_MALLOC(sizeof(struct wsp_gguf_context)); // read the header { @@ -19780,105 +19780,105 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ctx->infos = NULL; ctx->data = NULL; - ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset); + ok = ok && wsp_gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset); if (ctx->header.version == 1) { // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 uint32_t n_tensors = 0; uint32_t n_kv = 0; - ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset); - ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset); + ok = ok && wsp_gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset); + ok = ok && wsp_gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset); ctx->header.n_tensors = n_tensors; ctx->header.n_kv = n_kv; } else { - ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset); - ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset); + ok = ok && wsp_gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset); + ok = ok && wsp_gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset); } if (!ok) { fprintf(stderr, "%s: failed to read header\n", __func__); fclose(file); - gguf_free(ctx); + wsp_gguf_free(ctx); return NULL; } } // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 - bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur; + bool (* wsp_gguf_fread_str)(FILE *, struct wsp_gguf_str *, size_t *) = wsp_gguf_fread_str_cur; if (ctx->header.version == 1) { - gguf_fread_str = gguf_fread_str_v1; + wsp_gguf_fread_str = wsp_gguf_fread_str_v1; } // read the kv pairs { - ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv)); + ctx->kv = malloc(ctx->header.n_kv * sizeof(struct wsp_gguf_kv)); for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; + struct wsp_gguf_kv * kv = &ctx->kv[i]; //fprintf(stderr, "%s: reading kv %d\n", __func__, i); - ok = ok && gguf_fread_str(file, &kv->key, &offset); - ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset); + ok = ok && wsp_gguf_fread_str(file, &kv->key, &offset); + ok = ok && wsp_gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset); //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data); switch (kv->type) { - case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break; - case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break; - case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break; - case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break; - case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break; - case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break; - case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break; - case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break; - case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break; - case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; - case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; - case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break; - case GGUF_TYPE_ARRAY: + case WSP_GGUF_TYPE_UINT8: ok = ok && wsp_gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break; + case WSP_GGUF_TYPE_INT8: ok = ok && wsp_gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break; + case WSP_GGUF_TYPE_UINT16: ok = ok && wsp_gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break; + case WSP_GGUF_TYPE_INT16: ok = ok && wsp_gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break; + case WSP_GGUF_TYPE_UINT32: ok = ok && wsp_gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break; + case WSP_GGUF_TYPE_INT32: ok = ok && wsp_gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break; + case WSP_GGUF_TYPE_FLOAT32: ok = ok && wsp_gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break; + case WSP_GGUF_TYPE_UINT64: ok = ok && wsp_gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break; + case WSP_GGUF_TYPE_INT64: ok = ok && wsp_gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break; + case WSP_GGUF_TYPE_FLOAT64: ok = ok && wsp_gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; + case WSP_GGUF_TYPE_BOOL: ok = ok && wsp_gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; + case WSP_GGUF_TYPE_STRING: ok = ok && wsp_gguf_fread_str(file, &kv->value.str, &offset); break; + case WSP_GGUF_TYPE_ARRAY: { - ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); + ok = ok && wsp_gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); if (ctx->header.version == 1) { // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 uint32_t n = 0; - ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset); + ok = ok && wsp_gguf_fread_el(file, &n, sizeof(n), &offset); kv->value.arr.n = n; } else { - ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset); + ok = ok && wsp_gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset); } switch (kv->value.arr.type) { - case GGUF_TYPE_UINT8: - case GGUF_TYPE_INT8: - case GGUF_TYPE_UINT16: - case GGUF_TYPE_INT16: - case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: - case GGUF_TYPE_FLOAT32: - case GGUF_TYPE_UINT64: - case GGUF_TYPE_INT64: - case GGUF_TYPE_FLOAT64: - case GGUF_TYPE_BOOL: + case WSP_GGUF_TYPE_UINT8: + case WSP_GGUF_TYPE_INT8: + case WSP_GGUF_TYPE_UINT16: + case WSP_GGUF_TYPE_INT16: + case WSP_GGUF_TYPE_UINT32: + case WSP_GGUF_TYPE_INT32: + case WSP_GGUF_TYPE_FLOAT32: + case WSP_GGUF_TYPE_UINT64: + case WSP_GGUF_TYPE_INT64: + case WSP_GGUF_TYPE_FLOAT64: + case WSP_GGUF_TYPE_BOOL: { - kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); - ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset); + kv->value.arr.data = malloc(kv->value.arr.n * WSP_GGUF_TYPE_SIZE[kv->value.arr.type]); + ok = ok && wsp_gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * WSP_GGUF_TYPE_SIZE[kv->value.arr.type], &offset); } break; - case GGUF_TYPE_STRING: + case WSP_GGUF_TYPE_STRING: { - kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str)); + kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct wsp_gguf_str)); for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset); + ok = ok && wsp_gguf_fread_str(file, &((struct wsp_gguf_str *) kv->value.arr.data)[j], &offset); } } break; - case GGUF_TYPE_ARRAY: - case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break; + case WSP_GGUF_TYPE_ARRAY: + case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break; }; } break; - case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); + case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); }; if (!ok) { @@ -19889,51 +19889,51 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read key-value pairs\n", __func__); fclose(file); - gguf_free(ctx); + wsp_gguf_free(ctx); return NULL; } } // read the tensor infos { - ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info)); + ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct wsp_gguf_tensor_info)); for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct wsp_gguf_tensor_info * info = &ctx->infos[i]; for (int j = 0; j < WSP_GGML_MAX_DIMS; ++j) { info->ne[j] = 1; } - ok = ok && gguf_fread_str(file, &info->name, &offset); - ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset); + ok = ok && wsp_gguf_fread_str(file, &info->name, &offset); + ok = ok && wsp_gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset); for (uint32_t j = 0; j < info->n_dims; ++j) { if (ctx->header.version == 1) { // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 uint32_t t = 0; - ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset); + ok = ok && wsp_gguf_fread_el(file, &t, sizeof(t), &offset); info->ne[j] = t; } else { - ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); + ok = ok && wsp_gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); } } - ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset); - ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset); + ok = ok && wsp_gguf_fread_el (file, &info->type, sizeof(info->type), &offset); + ok = ok && wsp_gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset); if (!ok) { fprintf(stderr, "%s: failed to read tensor info\n", __func__); fclose(file); - gguf_free(ctx); + wsp_gguf_free(ctx); return NULL; } } } - ctx->alignment = GGUF_DEFAULT_ALIGNMENT; + ctx->alignment = WSP_GGUF_DEFAULT_ALIGNMENT; - int alignment_idx = gguf_find_key(ctx, "general.alignment"); + int alignment_idx = wsp_gguf_find_key(ctx, "general.alignment"); if (alignment_idx != -1) { - ctx->alignment = gguf_get_val_u32(ctx, alignment_idx); + ctx->alignment = wsp_gguf_get_val_u32(ctx, alignment_idx); } // we require the data section to be aligned, so take into account any padding @@ -19953,7 +19953,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p { ctx->size = 0; for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct wsp_gguf_tensor_info * info = &ctx->infos[i]; const int64_t ne = (int64_t) info->ne[0] * @@ -19965,7 +19965,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n", __func__, info->name.data, ne, wsp_ggml_blck_size(info->type)); fclose(file); - gguf_free(ctx); + wsp_gguf_free(ctx); return NULL; } @@ -19977,7 +19977,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // load the tensor data only if requested if (params.ctx != NULL) { - // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob + // if the provided wsp_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob // otherwise, we load the binary blob into the created wsp_ggml_context as well, and point the "data" members of // the wsp_ggml_tensor structs to the appropriate locations in the binary blob @@ -20005,13 +20005,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ok = ok && data != NULL; // read the binary blob with the tensor data - ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset); + ok = ok && wsp_gguf_fread_el(file, data->data, ctx->size, &offset); if (!ok) { fprintf(stderr, "%s: failed to read tensor data\n", __func__); fclose(file); wsp_ggml_free(ctx_data); - gguf_free(ctx); + wsp_gguf_free(ctx); return NULL; } @@ -20050,7 +20050,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p fprintf(stderr, "%s: failed to read the tensor data\n", __func__); fclose(file); wsp_ggml_free(ctx_data); - gguf_free(ctx); + wsp_gguf_free(ctx); return NULL; } @@ -20062,7 +20062,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p return ctx; } -void gguf_free(struct gguf_context * ctx) { +void wsp_gguf_free(struct wsp_gguf_context * ctx) { if (ctx == NULL) { return; } @@ -20070,23 +20070,23 @@ void gguf_free(struct gguf_context * ctx) { if (ctx->kv) { // free string memory - not great.. for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; + struct wsp_gguf_kv * kv = &ctx->kv[i]; if (kv->key.data) { free(kv->key.data); } - if (kv->type == GGUF_TYPE_STRING) { + if (kv->type == WSP_GGUF_TYPE_STRING) { if (kv->value.str.data) { free(kv->value.str.data); } } - if (kv->type == GGUF_TYPE_ARRAY) { + if (kv->type == WSP_GGUF_TYPE_ARRAY) { if (kv->value.arr.data) { - if (kv->value.arr.type == GGUF_TYPE_STRING) { + if (kv->value.arr.type == WSP_GGUF_TYPE_STRING) { for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; + struct wsp_gguf_str * str = &((struct wsp_gguf_str *) kv->value.arr.data)[j]; if (str->data) { free(str->data); } @@ -20102,7 +20102,7 @@ void gguf_free(struct gguf_context * ctx) { if (ctx->infos) { for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct wsp_gguf_tensor_info * info = &ctx->infos[i]; if (info->name.data) { free(info->name.data); @@ -20115,38 +20115,38 @@ void gguf_free(struct gguf_context * ctx) { WSP_GGML_ALIGNED_FREE(ctx); } -const char * gguf_type_name(enum gguf_type type) { - return GGUF_TYPE_NAME[type]; +const char * wsp_gguf_type_name(enum wsp_gguf_type type) { + return WSP_GGUF_TYPE_NAME[type]; } -int gguf_get_version(const struct gguf_context * ctx) { +int wsp_gguf_get_version(const struct wsp_gguf_context * ctx) { return ctx->header.version; } -size_t gguf_get_alignment(const struct gguf_context * ctx) { +size_t wsp_gguf_get_alignment(const struct wsp_gguf_context * ctx) { return ctx->alignment; } -size_t gguf_get_data_offset(const struct gguf_context * ctx) { +size_t wsp_gguf_get_data_offset(const struct wsp_gguf_context * ctx) { return ctx->offset; } -void * gguf_get_data(const struct gguf_context * ctx) { +void * wsp_gguf_get_data(const struct wsp_gguf_context * ctx) { return ctx->data; } -int gguf_get_n_kv(const struct gguf_context * ctx) { +int wsp_gguf_get_n_kv(const struct wsp_gguf_context * ctx) { return ctx->header.n_kv; } -int gguf_find_key(const struct gguf_context * ctx, const char * key) { +int wsp_gguf_find_key(const struct wsp_gguf_context * ctx, const char * key) { // return -1 if key not found int keyfound = -1; - const int n_kv = gguf_get_n_kv(ctx); + const int n_kv = wsp_gguf_get_n_kv(ctx); for (int i = 0; i < n_kv; ++i) { - if (strcmp(key, gguf_get_key(ctx, i)) == 0) { + if (strcmp(key, wsp_gguf_get_key(ctx, i)) == 0) { keyfound = i; break; } @@ -20155,92 +20155,92 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) { return keyfound; } -const char * gguf_get_key(const struct gguf_context * ctx, int i) { +const char * wsp_gguf_get_key(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].key.data; } -enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) { +enum wsp_gguf_type wsp_gguf_get_kv_type(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].type; } -enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) { +enum wsp_gguf_type wsp_gguf_get_arr_type(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.arr.type; } -const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) { +const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.arr.data; } -const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { - struct gguf_kv * kv = &ctx->kv[key_id]; - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; +const char * wsp_gguf_get_arr_str(const struct wsp_gguf_context * ctx, int key_id, int i) { + struct wsp_gguf_kv * kv = &ctx->kv[key_id]; + struct wsp_gguf_str * str = &((struct wsp_gguf_str *) kv->value.arr.data)[i]; return str->data; } -int gguf_get_arr_n(const struct gguf_context * ctx, int i) { +int wsp_gguf_get_arr_n(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.arr.n; } -uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) { +uint8_t wsp_gguf_get_val_u8(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.uint8; } -int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) { +int8_t wsp_gguf_get_val_i8(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.int8; } -uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) { +uint16_t wsp_gguf_get_val_u16(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.uint16; } -int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) { +int16_t wsp_gguf_get_val_i16(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.int16; } -uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) { +uint32_t wsp_gguf_get_val_u32(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.uint32; } -int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) { +int32_t wsp_gguf_get_val_i32(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.int32; } -float gguf_get_val_f32(const struct gguf_context * ctx, int i) { +float wsp_gguf_get_val_f32(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.float32; } -uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) { +uint64_t wsp_gguf_get_val_u64(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.uint64; } -int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) { +int64_t wsp_gguf_get_val_i64(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.int64; } -double gguf_get_val_f64(const struct gguf_context * ctx, int i) { +double wsp_gguf_get_val_f64(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.float64; } -bool gguf_get_val_bool(const struct gguf_context * ctx, int i) { +bool wsp_gguf_get_val_bool(const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.bool_; } -const char * gguf_get_val_str (const struct gguf_context * ctx, int i) { +const char * wsp_gguf_get_val_str (const struct wsp_gguf_context * ctx, int i) { return ctx->kv[i].value.str.data; } -int gguf_get_n_tensors(const struct gguf_context * ctx) { +int wsp_gguf_get_n_tensors(const struct wsp_gguf_context * ctx) { return ctx->header.n_tensors; } -int gguf_find_tensor(const struct gguf_context * ctx, const char * name) { +int wsp_gguf_find_tensor(const struct wsp_gguf_context * ctx, const char * name) { // return -1 if tensor not found int tensorfound = -1; - const int n_tensors = gguf_get_n_tensors(ctx); + const int n_tensors = wsp_gguf_get_n_tensors(ctx); for (int i = 0; i < n_tensors; ++i) { - if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) { + if (strcmp(name, wsp_gguf_get_tensor_name(ctx, i)) == 0) { tensorfound = i; break; } @@ -20249,24 +20249,24 @@ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) { return tensorfound; } -size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) { +size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i) { return ctx->infos[i].offset; } -char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) { +char * wsp_gguf_get_tensor_name(const struct wsp_gguf_context * ctx, int i) { return ctx->infos[i].name.data; } // returns the index -static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { - const int idx = gguf_find_key(ctx, key); +static int wsp_gguf_get_or_add_key(struct wsp_gguf_context * ctx, const char * key) { + const int idx = wsp_gguf_find_key(ctx, key); if (idx >= 0) { return idx; } - const int n_kv = gguf_get_n_kv(ctx); + const int n_kv = wsp_gguf_get_n_kv(ctx); - ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv)); + ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct wsp_gguf_kv)); ctx->kv[n_kv].key.n = strlen(key); ctx->kv[n_kv].key.data = strdup(key); ctx->header.n_kv++; @@ -20274,156 +20274,156 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { return n_kv; } -void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_u8(struct wsp_gguf_context * ctx, const char * key, uint8_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT8; + ctx->kv[idx].type = WSP_GGUF_TYPE_UINT8; ctx->kv[idx].value.uint8 = val; } -void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_i8(struct wsp_gguf_context * ctx, const char * key, int8_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT8; + ctx->kv[idx].type = WSP_GGUF_TYPE_INT8; ctx->kv[idx].value.int8 = val; } -void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_u16(struct wsp_gguf_context * ctx, const char * key, uint16_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT16; + ctx->kv[idx].type = WSP_GGUF_TYPE_UINT16; ctx->kv[idx].value.uint16 = val; } -void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_i16(struct wsp_gguf_context * ctx, const char * key, int16_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT16; + ctx->kv[idx].type = WSP_GGUF_TYPE_INT16; ctx->kv[idx].value.int16 = val; } -void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_u32(struct wsp_gguf_context * ctx, const char * key, uint32_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT32; + ctx->kv[idx].type = WSP_GGUF_TYPE_UINT32; ctx->kv[idx].value.uint32 = val; } -void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_i32(struct wsp_gguf_context * ctx, const char * key, int32_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT32; + ctx->kv[idx].type = WSP_GGUF_TYPE_INT32; ctx->kv[idx].value.int32 = val; } -void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_f32(struct wsp_gguf_context * ctx, const char * key, float val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_FLOAT32; + ctx->kv[idx].type = WSP_GGUF_TYPE_FLOAT32; ctx->kv[idx].value.float32 = val; } -void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_u64(struct wsp_gguf_context * ctx, const char * key, uint64_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT64; + ctx->kv[idx].type = WSP_GGUF_TYPE_UINT64; ctx->kv[idx].value.uint64 = val; } -void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_i64(struct wsp_gguf_context * ctx, const char * key, int64_t val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT64; + ctx->kv[idx].type = WSP_GGUF_TYPE_INT64; ctx->kv[idx].value.int64 = val; } -void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_f64(struct wsp_gguf_context * ctx, const char * key, double val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_FLOAT64; + ctx->kv[idx].type = WSP_GGUF_TYPE_FLOAT64; ctx->kv[idx].value.float64 = val; } -void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_bool(struct wsp_gguf_context * ctx, const char * key, bool val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_BOOL; + ctx->kv[idx].type = WSP_GGUF_TYPE_BOOL; ctx->kv[idx].value.bool_ = val; } -void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_val_str(struct wsp_gguf_context * ctx, const char * key, const char * val) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_STRING; + ctx->kv[idx].type = WSP_GGUF_TYPE_STRING; ctx->kv[idx].value.str.n = strlen(val); ctx->kv[idx].value.str.data = strdup(val); } -void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_arr_data(struct wsp_gguf_context * ctx, const char * key, enum wsp_gguf_type type, const void * data, int n) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_ARRAY; + ctx->kv[idx].type = WSP_GGUF_TYPE_ARRAY; ctx->kv[idx].value.arr.type = type; ctx->kv[idx].value.arr.n = n; - ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]); - memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]); + ctx->kv[idx].value.arr.data = malloc(n*WSP_GGUF_TYPE_SIZE[type]); + memcpy(ctx->kv[idx].value.arr.data, data, n*WSP_GGUF_TYPE_SIZE[type]); } -void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) { - const int idx = gguf_get_or_add_key(ctx, key); +void wsp_gguf_set_arr_str(struct wsp_gguf_context * ctx, const char * key, const char ** data, int n) { + const int idx = wsp_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_ARRAY; - ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING; + ctx->kv[idx].type = WSP_GGUF_TYPE_ARRAY; + ctx->kv[idx].value.arr.type = WSP_GGUF_TYPE_STRING; ctx->kv[idx].value.arr.n = n; - ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str)); + ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct wsp_gguf_str)); for (int i = 0; i < n; i++) { - struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i]; + struct wsp_gguf_str * str = &((struct wsp_gguf_str *)ctx->kv[idx].value.arr.data)[i]; str->n = strlen(data[i]); str->data = strdup(data[i]); } } // set or add KV pairs from another context -void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { +void wsp_gguf_set_kv(struct wsp_gguf_context * ctx, struct wsp_gguf_context * src) { for (uint32_t i = 0; i < src->header.n_kv; i++) { switch (src->kv[i].type) { - case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break; - case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break; - case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break; - case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break; - case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break; - case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break; - case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break; - case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break; - case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break; - case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; - case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; - case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; - case GGUF_TYPE_ARRAY: + case WSP_GGUF_TYPE_UINT8: wsp_gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break; + case WSP_GGUF_TYPE_INT8: wsp_gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break; + case WSP_GGUF_TYPE_UINT16: wsp_gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break; + case WSP_GGUF_TYPE_INT16: wsp_gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break; + case WSP_GGUF_TYPE_UINT32: wsp_gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break; + case WSP_GGUF_TYPE_INT32: wsp_gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break; + case WSP_GGUF_TYPE_FLOAT32: wsp_gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break; + case WSP_GGUF_TYPE_UINT64: wsp_gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break; + case WSP_GGUF_TYPE_INT64: wsp_gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break; + case WSP_GGUF_TYPE_FLOAT64: wsp_gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; + case WSP_GGUF_TYPE_BOOL: wsp_gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; + case WSP_GGUF_TYPE_STRING: wsp_gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; + case WSP_GGUF_TYPE_ARRAY: { - if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) { + if (src->kv[i].value.arr.type == WSP_GGUF_TYPE_STRING) { const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *)); for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) { - data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data; + data[j] = ((struct wsp_gguf_str *)src->kv[i].value.arr.data)[j].data; } - gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); + wsp_gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); free(data); - } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { + } else if (src->kv[i].value.arr.type == WSP_GGUF_TYPE_ARRAY) { WSP_GGML_ASSERT(false && "nested arrays not supported"); } else { - gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); + wsp_gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); } } break; - case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break; + case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break; } } } -void gguf_add_tensor( - struct gguf_context * ctx, +void wsp_gguf_add_tensor( + struct wsp_gguf_context * ctx, const struct wsp_ggml_tensor * tensor) { const int idx = ctx->header.n_tensors; - ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info)); + ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct wsp_gguf_tensor_info)); ctx->infos[idx].name.n = strlen(tensor->name); ctx->infos[idx].name.data = strdup(tensor->name); @@ -20449,8 +20449,8 @@ void gguf_add_tensor( ctx->header.n_tensors++; } -void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum wsp_ggml_type type) { - const int idx = gguf_find_tensor(ctx, name); +void wsp_gguf_set_tensor_type(struct wsp_gguf_context * ctx, const char * name, enum wsp_ggml_type type) { + const int idx = wsp_gguf_find_tensor(ctx, name); if (idx < 0) { WSP_GGML_ASSERT(false && "tensor not found"); } @@ -20458,8 +20458,8 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum wsp ctx->infos[idx].type = type; } -void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) { - const int idx = gguf_find_tensor(ctx, name); +void wsp_gguf_set_tensor_data(struct wsp_gguf_context * ctx, const char * name, const void * data, size_t size) { + const int idx = wsp_gguf_find_tensor(ctx, name); if (idx < 0) { WSP_GGML_ASSERT(false && "tensor not found"); } @@ -20473,23 +20473,23 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo } } -//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) { +//static void wsp_gguf_fwrite_str(FILE * file, const struct wsp_gguf_str * val) { // fwrite(&val->n, sizeof(val->n), 1, file); // fwrite(val->data, sizeof(char), val->n, file); //} // -//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) { +//static void wsp_gguf_fwrite_el(FILE * file, const void * val, size_t size) { // fwrite(val, sizeof(char), size, file); //} -struct gguf_buf { +struct wsp_gguf_buf { void * data; size_t size; size_t offset; }; -static struct gguf_buf gguf_buf_init(size_t size) { - struct gguf_buf buf = { +static struct wsp_gguf_buf wsp_gguf_buf_init(size_t size) { + struct wsp_gguf_buf buf = { /*buf.data =*/ size == 0 ? NULL : malloc(size), /*buf.size =*/ size, /*buf.offset =*/ 0, @@ -20498,13 +20498,13 @@ static struct gguf_buf gguf_buf_init(size_t size) { return buf; } -static void gguf_buf_free(struct gguf_buf buf) { +static void wsp_gguf_buf_free(struct wsp_gguf_buf buf) { if (buf.data) { free(buf.data); } } -static void gguf_buf_grow(struct gguf_buf * buf, size_t size) { +static void wsp_gguf_buf_grow(struct wsp_gguf_buf * buf, size_t size) { if (buf->offset + size > buf->size) { buf->size = 1.5*(buf->offset + size); if (buf->data) { @@ -20513,8 +20513,8 @@ static void gguf_buf_grow(struct gguf_buf * buf, size_t size) { } } -static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) { - gguf_buf_grow(buf, sizeof(val->n) + val->n); +static void wsp_gguf_bwrite_str(struct wsp_gguf_buf * buf, const struct wsp_gguf_str * val) { + wsp_gguf_buf_grow(buf, sizeof(val->n) + val->n); if (buf->data) { memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n)); @@ -20527,8 +20527,8 @@ static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) buf->offset += val->n; } -static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) { - gguf_buf_grow(buf, el_size); +static void wsp_gguf_bwrite_el(struct wsp_gguf_buf * buf, const void * val, size_t el_size) { + wsp_gguf_buf_grow(buf, el_size); if (buf->data) { memcpy((char *) buf->data + buf->offset, val, el_size); @@ -20536,78 +20536,78 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si buf->offset += el_size; } -static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { +static void wsp_gguf_write_to_buf(const struct wsp_gguf_context * ctx, struct wsp_gguf_buf * buf, bool only_meta) { // write header - gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); - gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); - gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); - gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); + wsp_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); + wsp_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); + wsp_gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); + wsp_gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); // write key-value pairs for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; + struct wsp_gguf_kv * kv = &ctx->kv[i]; - gguf_bwrite_str(buf, &kv->key); - gguf_bwrite_el (buf, &kv->type, sizeof(kv->type)); + wsp_gguf_bwrite_str(buf, &kv->key); + wsp_gguf_bwrite_el (buf, &kv->type, sizeof(kv->type)); switch (kv->type) { - case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break; - case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break; - case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break; - case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break; - case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break; - case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break; - case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break; - case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break; - case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break; - case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; - case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; - case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break; - case GGUF_TYPE_ARRAY: + case WSP_GGUF_TYPE_UINT8: wsp_gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break; + case WSP_GGUF_TYPE_INT8: wsp_gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break; + case WSP_GGUF_TYPE_UINT16: wsp_gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break; + case WSP_GGUF_TYPE_INT16: wsp_gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break; + case WSP_GGUF_TYPE_UINT32: wsp_gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break; + case WSP_GGUF_TYPE_INT32: wsp_gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break; + case WSP_GGUF_TYPE_FLOAT32: wsp_gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break; + case WSP_GGUF_TYPE_UINT64: wsp_gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break; + case WSP_GGUF_TYPE_INT64: wsp_gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break; + case WSP_GGUF_TYPE_FLOAT64: wsp_gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; + case WSP_GGUF_TYPE_BOOL: wsp_gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; + case WSP_GGUF_TYPE_STRING: wsp_gguf_bwrite_str(buf, &kv->value.str ); break; + case WSP_GGUF_TYPE_ARRAY: { - gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); - gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) ); + wsp_gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); + wsp_gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) ); switch (kv->value.arr.type) { - case GGUF_TYPE_UINT8: - case GGUF_TYPE_INT8: - case GGUF_TYPE_UINT16: - case GGUF_TYPE_INT16: - case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: - case GGUF_TYPE_FLOAT32: - case GGUF_TYPE_UINT64: - case GGUF_TYPE_INT64: - case GGUF_TYPE_FLOAT64: - case GGUF_TYPE_BOOL: + case WSP_GGUF_TYPE_UINT8: + case WSP_GGUF_TYPE_INT8: + case WSP_GGUF_TYPE_UINT16: + case WSP_GGUF_TYPE_INT16: + case WSP_GGUF_TYPE_UINT32: + case WSP_GGUF_TYPE_INT32: + case WSP_GGUF_TYPE_FLOAT32: + case WSP_GGUF_TYPE_UINT64: + case WSP_GGUF_TYPE_INT64: + case WSP_GGUF_TYPE_FLOAT64: + case WSP_GGUF_TYPE_BOOL: { - gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); + wsp_gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * WSP_GGUF_TYPE_SIZE[kv->value.arr.type]); } break; - case GGUF_TYPE_STRING: + case WSP_GGUF_TYPE_STRING: { for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]); + wsp_gguf_bwrite_str(buf, &((struct wsp_gguf_str *) kv->value.arr.data)[j]); } } break; - case GGUF_TYPE_ARRAY: - case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break; + case WSP_GGUF_TYPE_ARRAY: + case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); break; }; } break; - case GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); + case WSP_GGUF_TYPE_COUNT: WSP_GGML_ASSERT(false && "invalid type"); }; } // write tensor infos for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct wsp_gguf_tensor_info * info = &ctx->infos[i]; - gguf_bwrite_str(buf, &info->name); - gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims)); + wsp_gguf_bwrite_str(buf, &info->name); + wsp_gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims)); for (uint32_t j = 0; j < info->n_dims; ++j) { - gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j])); + wsp_gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j])); } - gguf_bwrite_el(buf, &info->type, sizeof(info->type)); - gguf_bwrite_el(buf, &info->offset, sizeof(info->offset)); + wsp_gguf_bwrite_el(buf, &info->type, sizeof(info->type)); + wsp_gguf_bwrite_el(buf, &info->offset, sizeof(info->offset)); } // we require the data section to be aligned, so take into account any padding @@ -20618,7 +20618,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * if (offset_pad != offset) { uint8_t pad = 0; for (size_t i = 0; i < offset_pad - offset; ++i) { - gguf_bwrite_el(buf, &pad, sizeof(pad)); + wsp_gguf_bwrite_el(buf, &pad, sizeof(pad)); } } } @@ -20631,17 +20631,17 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * // write tensor data for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct wsp_gguf_tensor_info * info = &ctx->infos[i]; const size_t size = info->size; const size_t size_pad = WSP_GGML_PAD(size, ctx->alignment); - gguf_bwrite_el(buf, info->data, size); + wsp_gguf_bwrite_el(buf, info->data, size); if (size_pad != size) { uint8_t pad = 0; for (size_t j = 0; j < size_pad - size; ++j) { - gguf_bwrite_el(buf, &pad, sizeof(pad)); + wsp_gguf_bwrite_el(buf, &pad, sizeof(pad)); } } @@ -20651,40 +20651,40 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * } } -void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { +void wsp_gguf_write_to_file(const struct wsp_gguf_context * ctx, const char * fname, bool only_meta) { FILE * file = fopen(fname, "wb"); if (!file) { WSP_GGML_ASSERT(false && "failed to open file for writing"); } - struct gguf_buf buf = gguf_buf_init(16*1024); + struct wsp_gguf_buf buf = wsp_gguf_buf_init(16*1024); - gguf_write_to_buf(ctx, &buf, only_meta); + wsp_gguf_write_to_buf(ctx, &buf, only_meta); fwrite(buf.data, 1, buf.offset, file); - gguf_buf_free(buf); + wsp_gguf_buf_free(buf); fclose(file); } -size_t gguf_get_meta_size(const struct gguf_context * ctx) { +size_t wsp_gguf_get_meta_size(const struct wsp_gguf_context * ctx) { // no allocs - only compute size - struct gguf_buf buf = gguf_buf_init(0); + struct wsp_gguf_buf buf = wsp_gguf_buf_init(0); - gguf_write_to_buf(ctx, &buf, true); + wsp_gguf_write_to_buf(ctx, &buf, true); return buf.offset; } -void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { - struct gguf_buf buf = gguf_buf_init(16*1024); +void wsp_gguf_get_meta_data(const struct wsp_gguf_context * ctx, void * data) { + struct wsp_gguf_buf buf = wsp_gguf_buf_init(16*1024); - gguf_write_to_buf(ctx, &buf, true); + wsp_gguf_write_to_buf(ctx, &buf, true); memcpy(data, buf.data, buf.offset); - gguf_buf_free(buf); + wsp_gguf_buf_free(buf); } //////////////////////////////////////////////////////////////////////////////// diff --git a/cpp/ggml.h b/cpp/ggml.h index a165aae..f1bbd88 100644 --- a/cpp/ggml.h +++ b/cpp/ggml.h @@ -231,10 +231,10 @@ #define WSP_GGML_EXIT_SUCCESS 0 #define WSP_GGML_EXIT_ABORTED 1 -#define GGUF_MAGIC 0x46554747 // "GGUF" -#define GGUF_VERSION 2 +#define WSP_GGUF_MAGIC 0x46554747 // "GGUF" +#define WSP_GGUF_VERSION 2 -#define GGUF_DEFAULT_ALIGNMENT 32 +#define WSP_GGUF_DEFAULT_ALIGNMENT 32 #define WSP_GGML_UNUSED(x) (void)(x) @@ -1841,122 +1841,122 @@ extern "C" { // gguf // - enum gguf_type { - GGUF_TYPE_UINT8 = 0, - GGUF_TYPE_INT8 = 1, - GGUF_TYPE_UINT16 = 2, - GGUF_TYPE_INT16 = 3, - GGUF_TYPE_UINT32 = 4, - GGUF_TYPE_INT32 = 5, - GGUF_TYPE_FLOAT32 = 6, - GGUF_TYPE_BOOL = 7, - GGUF_TYPE_STRING = 8, - GGUF_TYPE_ARRAY = 9, - GGUF_TYPE_UINT64 = 10, - GGUF_TYPE_INT64 = 11, - GGUF_TYPE_FLOAT64 = 12, - GGUF_TYPE_COUNT, // marks the end of the enum + enum wsp_gguf_type { + WSP_GGUF_TYPE_UINT8 = 0, + WSP_GGUF_TYPE_INT8 = 1, + WSP_GGUF_TYPE_UINT16 = 2, + WSP_GGUF_TYPE_INT16 = 3, + WSP_GGUF_TYPE_UINT32 = 4, + WSP_GGUF_TYPE_INT32 = 5, + WSP_GGUF_TYPE_FLOAT32 = 6, + WSP_GGUF_TYPE_BOOL = 7, + WSP_GGUF_TYPE_STRING = 8, + WSP_GGUF_TYPE_ARRAY = 9, + WSP_GGUF_TYPE_UINT64 = 10, + WSP_GGUF_TYPE_INT64 = 11, + WSP_GGUF_TYPE_FLOAT64 = 12, + WSP_GGUF_TYPE_COUNT, // marks the end of the enum }; - struct gguf_context; + struct wsp_gguf_context; - struct gguf_init_params { + struct wsp_gguf_init_params { bool no_alloc; // if not NULL, create a wsp_ggml_context and allocate the tensor data in it struct wsp_ggml_context ** ctx; }; - WSP_GGML_API struct gguf_context * gguf_init_empty(void); - WSP_GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); - //WSP_GGML_API struct gguf_context * gguf_init_from_buffer(..); + WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_empty(void); + WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file(const char * fname, struct wsp_gguf_init_params params); + //WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_buffer(..); - WSP_GGML_API void gguf_free(struct gguf_context * ctx); + WSP_GGML_API void wsp_gguf_free(struct wsp_gguf_context * ctx); - WSP_GGML_API const char * gguf_type_name(enum gguf_type type); + WSP_GGML_API const char * wsp_gguf_type_name(enum wsp_gguf_type type); - WSP_GGML_API int gguf_get_version (const struct gguf_context * ctx); - WSP_GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx); - WSP_GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); - WSP_GGML_API void * gguf_get_data (const struct gguf_context * ctx); + WSP_GGML_API int wsp_gguf_get_version (const struct wsp_gguf_context * ctx); + WSP_GGML_API size_t wsp_gguf_get_alignment (const struct wsp_gguf_context * ctx); + WSP_GGML_API size_t wsp_gguf_get_data_offset(const struct wsp_gguf_context * ctx); + WSP_GGML_API void * wsp_gguf_get_data (const struct wsp_gguf_context * ctx); - WSP_GGML_API int gguf_get_n_kv(const struct gguf_context * ctx); - WSP_GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key); - WSP_GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i); + WSP_GGML_API int wsp_gguf_get_n_kv(const struct wsp_gguf_context * ctx); + WSP_GGML_API int wsp_gguf_find_key(const struct wsp_gguf_context * ctx, const char * key); + WSP_GGML_API const char * wsp_gguf_get_key (const struct wsp_gguf_context * ctx, int i); - WSP_GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i); - WSP_GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i); + WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_kv_type (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_arr_type(const struct wsp_gguf_context * ctx, int i); // results are undefined if the wrong type is used for the key - WSP_GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i); - WSP_GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i); - WSP_GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i); - WSP_GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i); - WSP_GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i); - WSP_GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i); - WSP_GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i); - WSP_GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i); - WSP_GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i); - WSP_GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i); - WSP_GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i); - WSP_GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i); - WSP_GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i); - WSP_GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i); - WSP_GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i); - - WSP_GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx); - WSP_GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name); - WSP_GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i); - WSP_GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i); + WSP_GGML_API uint8_t wsp_gguf_get_val_u8 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API int8_t wsp_gguf_get_val_i8 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API uint16_t wsp_gguf_get_val_u16 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API int16_t wsp_gguf_get_val_i16 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API uint32_t wsp_gguf_get_val_u32 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API int32_t wsp_gguf_get_val_i32 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API float wsp_gguf_get_val_f32 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API uint64_t wsp_gguf_get_val_u64 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API int64_t wsp_gguf_get_val_i64 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API double wsp_gguf_get_val_f64 (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API bool wsp_gguf_get_val_bool(const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API const char * wsp_gguf_get_val_str (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API int wsp_gguf_get_arr_n (const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API const char * wsp_gguf_get_arr_str (const struct wsp_gguf_context * ctx, int key_id, int i); + + WSP_GGML_API int wsp_gguf_get_n_tensors (const struct wsp_gguf_context * ctx); + WSP_GGML_API int wsp_gguf_find_tensor (const struct wsp_gguf_context * ctx, const char * name); + WSP_GGML_API size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i); + WSP_GGML_API char * wsp_gguf_get_tensor_name (const struct wsp_gguf_context * ctx, int i); // overrides existing values or adds a new one - WSP_GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); - WSP_GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); - WSP_GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); - WSP_GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); - WSP_GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); - WSP_GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); - WSP_GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); - WSP_GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); - WSP_GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); - WSP_GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); - WSP_GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val); - WSP_GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); - WSP_GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); - WSP_GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); + WSP_GGML_API void wsp_gguf_set_val_u8 (struct wsp_gguf_context * ctx, const char * key, uint8_t val); + WSP_GGML_API void wsp_gguf_set_val_i8 (struct wsp_gguf_context * ctx, const char * key, int8_t val); + WSP_GGML_API void wsp_gguf_set_val_u16 (struct wsp_gguf_context * ctx, const char * key, uint16_t val); + WSP_GGML_API void wsp_gguf_set_val_i16 (struct wsp_gguf_context * ctx, const char * key, int16_t val); + WSP_GGML_API void wsp_gguf_set_val_u32 (struct wsp_gguf_context * ctx, const char * key, uint32_t val); + WSP_GGML_API void wsp_gguf_set_val_i32 (struct wsp_gguf_context * ctx, const char * key, int32_t val); + WSP_GGML_API void wsp_gguf_set_val_f32 (struct wsp_gguf_context * ctx, const char * key, float val); + WSP_GGML_API void wsp_gguf_set_val_u64 (struct wsp_gguf_context * ctx, const char * key, uint64_t val); + WSP_GGML_API void wsp_gguf_set_val_i64 (struct wsp_gguf_context * ctx, const char * key, int64_t val); + WSP_GGML_API void wsp_gguf_set_val_f64 (struct wsp_gguf_context * ctx, const char * key, double val); + WSP_GGML_API void wsp_gguf_set_val_bool(struct wsp_gguf_context * ctx, const char * key, bool val); + WSP_GGML_API void wsp_gguf_set_val_str (struct wsp_gguf_context * ctx, const char * key, const char * val); + WSP_GGML_API void wsp_gguf_set_arr_data(struct wsp_gguf_context * ctx, const char * key, enum wsp_gguf_type type, const void * data, int n); + WSP_GGML_API void wsp_gguf_set_arr_str (struct wsp_gguf_context * ctx, const char * key, const char ** data, int n); // set or add KV pairs from another context - WSP_GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); + WSP_GGML_API void wsp_gguf_set_kv(struct wsp_gguf_context * ctx, struct wsp_gguf_context * src); // manage tensor info - WSP_GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct wsp_ggml_tensor * tensor); - WSP_GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum wsp_ggml_type type); - WSP_GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size); + WSP_GGML_API void wsp_gguf_add_tensor(struct wsp_gguf_context * ctx, const struct wsp_ggml_tensor * tensor); + WSP_GGML_API void wsp_gguf_set_tensor_type(struct wsp_gguf_context * ctx, const char * name, enum wsp_ggml_type type); + WSP_GGML_API void wsp_gguf_set_tensor_data(struct wsp_gguf_context * ctx, const char * name, const void * data, size_t size); // writing gguf files can be done in 2 ways: // - // - write the entire gguf_context to a binary file in a single pass: + // - write the entire wsp_gguf_context to a binary file in a single pass: // - // gguf_write_to_file(ctx, fname); + // wsp_gguf_write_to_file(ctx, fname); // // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: // // FILE * f = fopen(fname, "wb"); - // fseek(f, gguf_get_meta_size(ctx), SEEK_SET); + // fseek(f, wsp_gguf_get_meta_size(ctx), SEEK_SET); // fwrite(f, ...); - // void * data = gguf_meta_get_meta_data(ctx); + // void * data = wsp_gguf_meta_get_meta_data(ctx); // fseek(f, 0, SEEK_SET); - // fwrite(f, data, gguf_get_meta_size(ctx)); + // fwrite(f, data, wsp_gguf_get_meta_size(ctx)); // free(data); // fclose(f); // // write the entire context to a binary file - WSP_GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); + WSP_GGML_API void wsp_gguf_write_to_file(const struct wsp_gguf_context * ctx, const char * fname, bool only_meta); // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding - WSP_GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); - WSP_GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data); + WSP_GGML_API size_t wsp_gguf_get_meta_size(const struct wsp_gguf_context * ctx); + WSP_GGML_API void wsp_gguf_get_meta_data(const struct wsp_gguf_context * ctx, void * data); // // system info diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index ff9eed5..8549b68 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -9,7 +9,7 @@ cp ./whisper.cpp/ggml-alloc.h ./cpp/ggml-alloc.h cp ./whisper.cpp/ggml-alloc.c ./cpp/ggml-alloc.c cp ./whisper.cpp/ggml-metal.h ./cpp/ggml-metal.h cp ./whisper.cpp/ggml-metal.m ./cpp/ggml-metal.m -cp ./whisper.cpp/ggml-metal.metal ./cpp/ggml-metal.metal +cp ./whisper.cpp/ggml-metal.metal ./cpp/ggml-metal-whisper.metal cp ./whisper.cpp/whisper.h ./cpp/whisper.h cp ./whisper.cpp/whisper.cpp ./cpp/whisper.cpp @@ -35,10 +35,14 @@ for file in "${files[@]}"; do if [ "$OS" = "Darwin" ]; then sed -i '' 's/GGML_/WSP_GGML_/g' $file sed -i '' 's/ggml_/wsp_ggml_/g' $file + sed -i '' 's/GGUF_/WSP_GGUF_/g' $file + sed -i '' 's/gguf_/wsp_gguf_/g' $file sed -i '' 's/GGMLMetalClass/WSPGGMLMetalClass/g' $file else sed -i 's/GGML_/WSP_GGML_/g' $file sed -i 's/ggml_/wsp_ggml_/g' $file + sed -i 's/GGUF_/WSP_GGUF_/g' $file + sed -i 's/gguf_/wsp_gguf_/g' $file sed -i 's/GGMLMetalClass/WSPGGMLMetalClass/g' $file fi done diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch index 7cdf569..222da8a 100644 --- a/scripts/ggml-metal.m.patch +++ b/scripts/ggml-metal.m.patch @@ -1,5 +1,14 @@ ---- ggml-metal.m.orig 2023-10-05 09:26:55 -+++ ggml-metal.m 2023-10-05 09:26:56 +--- ggml-metal.m.orig 2023-10-25 17:55:09 ++++ ggml-metal.m 2023-10-25 17:55:42 +@@ -178,7 +178,7 @@ + + //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"]; + NSBundle * bundle = [NSBundle bundleForClass:[WSPGGMLMetalClass class]]; +- NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; ++ NSString * path = [bundle pathForResource:@"ggml-metal-whisper" ofType:@"metal"]; + metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]); + + NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; @@ -207,7 +207,7 @@ #define WSP_GGML_METAL_ADD_KERNEL(name) \ ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \ @@ -18,7 +27,7 @@ WSP_GGML_METAL_DEL_KERNEL(add); WSP_GGML_METAL_DEL_KERNEL(add_row); -@@ -342,16 +340,6 @@ +@@ -342,17 +340,7 @@ WSP_GGML_METAL_DEL_KERNEL(cpy_f16_f16); #undef WSP_GGML_METAL_DEL_KERNEL @@ -26,12 +35,13 @@ - for (int i = 0; i < ctx->n_buffers; ++i) { - [ctx->buffers[i].metal release]; - } -- + - [ctx->library release]; - [ctx->queue release]; - [ctx->device release]; - - dispatch_release(ctx->d_queue); - +- free(ctx); } +