From ae1df68cb11a11b60f96361fe9d6666660fb31d6 Mon Sep 17 00:00:00 2001
From: Jhen <developer@jhen.me>
Date: Tue, 7 Nov 2023 18:03:25 +0800
Subject: [PATCH] feat: sync whisper.cpp

---
 cpp/coreml/whisper-encoder.mm |  4 ++--
 cpp/ggml-metal.m              |  6 +++---
 cpp/whisper.cpp               |  8 ++++----
 cpp/whisper.h                 |  1 -
 scripts/ggml-metal.m.patch    | 22 ++--------------------
 whisper.cpp                   |  2 +-
 6 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/cpp/coreml/whisper-encoder.mm b/cpp/coreml/whisper-encoder.mm
index 9a4e135..499edae 100644
--- a/cpp/coreml/whisper-encoder.mm
+++ b/cpp/coreml/whisper-encoder.mm
@@ -24,9 +24,9 @@
 
     // select which device to run the Core ML model on
     MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
-    //config.computeUnits = MLComputeUnitsCPUAndGPU;
+    config.computeUnits = MLComputeUnitsCPUAndGPU;
     //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
-    config.computeUnits = MLComputeUnitsAll;
+    //config.computeUnits = MLComputeUnitsAll;
 
     const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
 
diff --git a/cpp/ggml-metal.m b/cpp/ggml-metal.m
index 27e42d8..3973987 100644
--- a/cpp/ggml-metal.m
+++ b/cpp/ggml-metal.m
@@ -1018,7 +1018,7 @@ void wsp_ggml_metal_graph_compute(
                             [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
                             [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
                             [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth/32*sizeof(float), 16) atIndex:0];
 
                             [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
                         } break;
@@ -1330,7 +1330,7 @@ void wsp_ggml_metal_graph_compute(
                             [encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
                             [encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:3];
                             [encoder setBytes:&eps  length:sizeof(   float) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth/32*sizeof(float), 16) atIndex:0];
 
                             const int64_t nrows = wsp_ggml_nrows(src0);
 
@@ -1349,7 +1349,7 @@ void wsp_ggml_metal_graph_compute(
                             [encoder setBytes:&ne00    length:sizeof( int64_t) atIndex:2];
                             [encoder setBytes:&nb01    length:sizeof(uint64_t) atIndex:3];
                             [encoder setBytes:&eps     length:sizeof(   float) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:WSP_GGML_PAD(nth*sizeof(float), 16) atIndex:0];
 
                             const int64_t nrows = wsp_ggml_nrows(src0);
 
diff --git a/cpp/whisper.cpp b/cpp/whisper.cpp
index 49af5e7..7a424e4 100644
--- a/cpp/whisper.cpp
+++ b/cpp/whisper.cpp
@@ -2855,9 +2855,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
         log("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
     }
 
-    
 #ifdef WHISPER_USE_COREML
-    if (ctx->params.use_coreml) {
     const auto path_coreml = whisper_get_coreml_path_encoder(ctx->path_model);
 
     log("%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
@@ -2873,7 +2871,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
     } else {
         log("%s: Core ML model loaded\n", __func__);
     }
-    }
 #endif
 
     state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
@@ -3051,7 +3048,6 @@ int whisper_ctx_init_openvino_encoder(
 struct whisper_context_params whisper_context_default_params() {
     struct whisper_context_params result = {
         /*.use_gpu    =*/ true,
-        /*.use_coreml =*/ false,
     };
     return result;
 }
@@ -3692,6 +3688,7 @@ void whisper_print_timings(struct whisper_context * ctx) {
 }
 
 void whisper_reset_timings(struct whisper_context * ctx) {
+    ctx->t_start_us = wsp_ggml_time_us();
     if (ctx->state != nullptr) {
         ctx->state->t_sample_us = 0;
         ctx->state->t_encode_us = 0;
@@ -3830,6 +3827,9 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
         /*.encoder_begin_callback           =*/ nullptr,
         /*.encoder_begin_callback_user_data =*/ nullptr,
 
+        /*.abort_callback                   =*/ nullptr,
+        /*.abort_callback_user_data         =*/ nullptr,
+
         /*.logits_filter_callback           =*/ nullptr,
         /*.logits_filter_callback_user_data =*/ nullptr,
     };
diff --git a/cpp/whisper.h b/cpp/whisper.h
index cf61955..3c3890b 100644
--- a/cpp/whisper.h
+++ b/cpp/whisper.h
@@ -81,7 +81,6 @@ extern "C" {
 
     struct whisper_context_params {
         bool  use_gpu;
-        bool  use_coreml;
     };
 
     typedef struct whisper_token_data {
diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
index 8ee760b..0fe27b0 100644
--- a/scripts/ggml-metal.m.patch
+++ b/scripts/ggml-metal.m.patch
@@ -1,5 +1,5 @@
---- ggml-metal.m.orig	2023-11-07 09:45:34
-+++ ggml-metal.m	2023-11-07 09:42:49
+--- ggml-metal.m.orig	2023-11-07 18:03:28
++++ ggml-metal.m	2023-11-07 18:03:29
 @@ -215,7 +215,7 @@
              if (ggmlMetalPathResources) {
                  sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"];
@@ -35,21 +35,3 @@
  
      free(ctx);
  }
-@@ -1030,7 +1018,7 @@
-                             [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
-                             [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
-                             [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
--                            [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
-+                            [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];
- 
-                             [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
-                         } break;
-@@ -1361,7 +1349,7 @@
-                             [encoder setBytes:&ne00    length:sizeof( int64_t) atIndex:2];
-                             [encoder setBytes:&nb01    length:sizeof(uint64_t) atIndex:3];
-                             [encoder setBytes:&eps     length:sizeof(   float) atIndex:4];
--                            [encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
-+                            [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];
- 
-                             const int64_t nrows = wsp_ggml_nrows(src0);
- 
diff --git a/whisper.cpp b/whisper.cpp
index 0463028..11b5030 160000
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -1 +1 @@
-Subproject commit 0463028bc2a5774fe7361c8ac37bef440725bcd7
+Subproject commit 11b503055e1810afd45127b626d823fa7d15d531