From ae1df68cb11a11b60f96361fe9d6666660fb31d6 Mon Sep 17 00:00:00 2001 From: Jhen Date: Tue, 7 Nov 2023 18:03:25 +0800 Subject: [PATCH] feat: sync whisper.cpp --- cpp/coreml/whisper-encoder.mm | 4 ++-- cpp/ggml-metal.m | 6 +++--- cpp/whisper.cpp | 8 ++++---- cpp/whisper.h | 1 - scripts/ggml-metal.m.patch | 22 ++-------------------- whisper.cpp | 2 +- 6 files changed, 12 insertions(+), 31 deletions(-) diff --git a/cpp/coreml/whisper-encoder.mm b/cpp/coreml/whisper-encoder.mm index 9a4e135..499edae 100644 --- a/cpp/coreml/whisper-encoder.mm +++ b/cpp/coreml/whisper-encoder.mm @@ -24,9 +24,9 @@ // select which device to run the Core ML model on MLModelConfiguration *config = [[MLModelConfiguration alloc] init]; - //config.computeUnits = MLComputeUnitsCPUAndGPU; + config.computeUnits = MLComputeUnitsCPUAndGPU; //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine; - config.computeUnits = MLComputeUnitsAll; + //config.computeUnits = MLComputeUnitsAll; const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]); diff --git a/cpp/ggml-metal.m b/cpp/ggml-metal.m index 27e42d8..3973987 100644 --- a/cpp/ggml-metal.m +++ b/cpp/ggml-metal.m @@ -1018,7 +1018,7 @@ void wsp_ggml_metal_graph_compute( [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2]; [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3]; [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4]; - [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0]; + [encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth/32*sizeof(float), 16) atIndex:0]; [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; } break; @@ -1330,7 +1330,7 @@ void wsp_ggml_metal_graph_compute( [encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2]; [encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3]; [encoder setBytes:&eps length:sizeof( float) atIndex:4]; - [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0]; + [encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth/32*sizeof(float), 16) atIndex:0]; const int64_t nrows = wsp_ggml_nrows(src0); @@ -1349,7 +1349,7 @@ void wsp_ggml_metal_graph_compute( [encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2]; [encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3]; [encoder setBytes:&eps length:sizeof( float) atIndex:4]; - [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0]; + [encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth*sizeof(float), 16) atIndex:0]; const int64_t nrows = wsp_ggml_nrows(src0); diff --git a/cpp/whisper.cpp b/cpp/whisper.cpp index 49af5e7..7a424e4 100644 --- a/cpp/whisper.cpp +++ b/cpp/whisper.cpp @@ -2855,9 +2855,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { log("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0); } - #ifdef WHISPER_USE_COREML - if (ctx->params.use_coreml) { const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model); log("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str()); @@ -2873,7 +2871,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { } else { log("%s: Core ML model loaded\n", __func__); } - } #endif state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx); @@ -3051,7 +3048,6 @@ int whisper_ctx_init_openvino_encoder( struct whisper_context_params whisper_context_default_params() { struct whisper_context_params result = { /*.use_gpu =*/ true, - /*.use_coreml =*/ false, }; return result; } @@ -3692,6 +3688,7 @@ void whisper_print_timings(struct whisper_context * ctx) { } void whisper_reset_timings(struct whisper_context * ctx) { + ctx->t_start_us = wsp_ggml_time_us(); if (ctx->state != nullptr) { ctx->state->t_sample_us = 0; ctx->state->t_encode_us = 0; @@ -3830,6 +3827,9 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str /*.encoder_begin_callback =*/ nullptr, /*.encoder_begin_callback_user_data =*/ nullptr, + /*.abort_callback =*/ nullptr, + /*.abort_callback_user_data =*/ nullptr, + /*.logits_filter_callback =*/ nullptr, /*.logits_filter_callback_user_data =*/ nullptr, }; diff --git a/cpp/whisper.h b/cpp/whisper.h index cf61955..3c3890b 100644 --- a/cpp/whisper.h +++ b/cpp/whisper.h @@ -81,7 +81,6 @@ extern "C" { struct whisper_context_params { bool use_gpu; - bool use_coreml; }; typedef struct whisper_token_data { diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch index 8ee760b..0fe27b0 100644 --- a/scripts/ggml-metal.m.patch +++ b/scripts/ggml-metal.m.patch @@ -1,5 +1,5 @@ ---- ggml-metal.m.orig 2023-11-07 09:45:34 -+++ ggml-metal.m 2023-11-07 09:42:49 +--- ggml-metal.m.orig 2023-11-07 18:03:28 ++++ ggml-metal.m 2023-11-07 18:03:29 @@ -215,7 +215,7 @@ if (ggmlMetalPathResources) { sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"]; @@ -35,21 +35,3 @@ free(ctx); } -@@ -1030,7 +1018,7 @@ - [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2]; - [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3]; - [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4]; -- [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0]; -+ [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0]; - - [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; - } break; -@@ -1361,7 +1349,7 @@ - [encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2]; - [encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3]; - [encoder setBytes:&eps length:sizeof( float) atIndex:4]; -- [encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0]; -+ [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0]; - - const int64_t nrows = wsp_ggml_nrows(src0); - diff --git a/whisper.cpp b/whisper.cpp index 0463028..11b5030 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit 0463028bc2a5774fe7361c8ac37bef440725bcd7 +Subproject commit 11b503055e1810afd45127b626d823fa7d15d531