Skip to content

Commit

Permalink
feat: sync whisper.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Nov 7, 2023
1 parent 0e81091 commit ae1df68
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 31 deletions.
4 changes: 2 additions & 2 deletions cpp/coreml/whisper-encoder.mm
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@

// select which device to run the Core ML model on
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
//config.computeUnits = MLComputeUnitsCPUAndGPU;
config.computeUnits = MLComputeUnitsCPUAndGPU;
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
config.computeUnits = MLComputeUnitsAll;
//config.computeUnits = MLComputeUnitsAll;

const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);

Expand Down
6 changes: 3 additions & 3 deletions cpp/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ void wsp_ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
[encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];
[encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth/32*sizeof(float), 16) atIndex:0];

[encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
} break;
Expand Down Expand Up @@ -1330,7 +1330,7 @@ void wsp_ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
[encoder setBytes:&eps length:sizeof( float) atIndex:4];
[encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
[encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth/32*sizeof(float), 16) atIndex:0];

const int64_t nrows = wsp_ggml_nrows(src0);

Expand All @@ -1349,7 +1349,7 @@ void wsp_ggml_metal_graph_compute(
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
[encoder setBytes:&eps length:sizeof( float) atIndex:4];
[encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];
[encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth*sizeof(float), 16) atIndex:0];

const int64_t nrows = wsp_ggml_nrows(src0);

Expand Down
8 changes: 4 additions & 4 deletions cpp/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2855,9 +2855,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
log("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
}


#ifdef WHISPER_USE_COREML
if (ctx->params.use_coreml) {
const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model);

log("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
Expand All @@ -2873,7 +2871,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
} else {
log("%s: Core ML model loaded\n", __func__);
}
}
#endif

state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
Expand Down Expand Up @@ -3051,7 +3048,6 @@ int whisper_ctx_init_openvino_encoder(
struct whisper_context_params whisper_context_default_params() {
struct whisper_context_params result = {
/*.use_gpu =*/ true,
/*.use_coreml =*/ false,
};
return result;
}
Expand Down Expand Up @@ -3692,6 +3688,7 @@ void whisper_print_timings(struct whisper_context * ctx) {
}

void whisper_reset_timings(struct whisper_context * ctx) {
ctx->t_start_us = wsp_ggml_time_us();
if (ctx->state != nullptr) {
ctx->state->t_sample_us = 0;
ctx->state->t_encode_us = 0;
Expand Down Expand Up @@ -3830,6 +3827,9 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
/*.encoder_begin_callback =*/ nullptr,
/*.encoder_begin_callback_user_data =*/ nullptr,

/*.abort_callback =*/ nullptr,
/*.abort_callback_user_data =*/ nullptr,

/*.logits_filter_callback =*/ nullptr,
/*.logits_filter_callback_user_data =*/ nullptr,
};
Expand Down
1 change: 0 additions & 1 deletion cpp/whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ extern "C" {

struct whisper_context_params {
bool use_gpu;
bool use_coreml;
};

typedef struct whisper_token_data {
Expand Down
22 changes: 2 additions & 20 deletions scripts/ggml-metal.m.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--- ggml-metal.m.orig 2023-11-07 09:45:34
+++ ggml-metal.m 2023-11-07 09:42:49
--- ggml-metal.m.orig 2023-11-07 18:03:28
+++ ggml-metal.m 2023-11-07 18:03:29
@@ -215,7 +215,7 @@
if (ggmlMetalPathResources) {
sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"];
Expand Down Expand Up @@ -35,21 +35,3 @@

free(ctx);
}
@@ -1030,7 +1018,7 @@
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
- [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
+ [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];

[encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
} break;
@@ -1361,7 +1349,7 @@
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
[encoder setBytes:&eps length:sizeof( float) atIndex:4];
- [encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
+ [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];

const int64_t nrows = wsp_ggml_nrows(src0);

2 changes: 1 addition & 1 deletion whisper.cpp

0 comments on commit ae1df68

Please sign in to comment.