fix: timings

mybigday · Nov 7, 2024 · b2d5f52 · b2d5f52
1 parent b4d7d94
commit b2d5f52
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 54 deletions.
diff --git a/cpp/rn-whisper.cpp b/cpp/rn-whisper.cpp
@@ -63,12 +63,13 @@ std::string bench(struct whisper_context * ctx, int n_threads) {
 
     const int64_t t_end_us = wsp_ggml_time_us();
     whisper_timings timings = whisper_get_timings(ctx);
+
     return std::string("[") +
         std::to_string(n_threads) + "," +
-        std::to_string(timings.n_encode) + "," +
-        std::to_string(timings.n_decode) + "," +
-        std::to_string(timings.n_batchd) + "," +
-        std::to_string(timings.n_prompt) + "]";
+        std::to_string(timings.encode_ms) + "," +
+        std::to_string(timings.decode_ms) + "," +
+        std::to_string(timings.batchd_ms) + "," +
+        std::to_string(timings.prompt_ms) + "]";
 }
 
 void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {

diff --git a/cpp/whisper.cpp b/cpp/whisper.cpp
@@ -4191,33 +4191,37 @@ whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
 }
 
 struct whisper_timings whisper_get_timings(struct whisper_context * ctx) {
-    const int64_t t_end_us = wsp_ggml_time_us();
     if (ctx->state == nullptr) {
         return {
             .load_us = ctx->t_load_us,
             .t_start_us = ctx->t_start_us,
             .fail_p = 0,
             .fail_h = 0,
-            .t_mel_us = 0,
-            .n_sample = 0,
-            .n_encode = 0,
-            .n_decode = 0,
-            .n_batchd = 0,
-            .n_prompt = 0,
+            .mel_ms = 0,
+            .sample_ms = 0,
+            .encode_ms = 0,
+            .decode_ms = 0,
+            .batchd_ms = 0,
+            .prompt_ms = 0,
         };
     }
 
+    const int32_t n_sample = std::max(1, ctx->state->n_sample);
+    const int32_t n_encode = std::max(1, ctx->state->n_encode);
+    const int32_t n_decode = std::max(1, ctx->state->n_decode);
+    const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
+    const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
     return {
         .load_us = ctx->t_load_us,
         .t_start_us = ctx->t_start_us,
         .fail_p = ctx->state->n_fail_p,
         .fail_h = ctx->state->n_fail_h,
-        .t_mel_us = ctx->state->t_mel_us,
-        .n_sample = std::max(1, ctx->state->n_sample),
-        .n_encode = std::max(1, ctx->state->n_encode),
-        .n_decode = std::max(1, ctx->state->n_decode),
-        .n_batchd = std::max(1, ctx->state->n_batchd),
-        .n_prompt = std::max(1, ctx->state->n_prompt),
+        .mel_ms = ctx->state->t_mel_us / 1000.0f,
+        .sample_ms = 1e-3f * ctx->state->t_sample_us / n_sample,
+        .encode_ms = 1e-3f * ctx->state->t_encode_us / n_encode,
+        .decode_ms = 1e-3f * ctx->state->t_decode_us / n_decode,
+        .batchd_ms = 1e-3f * ctx->state->t_batchd_us / n_batchd,
+        .prompt_ms = 1e-3f * ctx->state->t_prompt_us / n_prompt,
     };
 }
 
@@ -4228,13 +4232,19 @@ void whisper_print_timings(struct whisper_context * ctx) {
     WHISPER_LOG_INFO("\n");
     WHISPER_LOG_INFO("%s:     load time = %8.2f ms\n", __func__, timings.load_us / 1000.0f);
     if (ctx->state != nullptr) {
+        const int32_t n_sample = std::max(1, ctx->state->n_sample);
+        const int32_t n_encode = std::max(1, ctx->state->n_encode);
+        const int32_t n_decode = std::max(1, ctx->state->n_decode);
+        const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
+        const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
+
         WHISPER_LOG_INFO("%s:     fallbacks = %3d p / %3d h\n", __func__, timings.fail_p, timings.fail_h);
-        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, timings.t_mel_us / 1000.0f);
-        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_sample, timings.n_sample, 1e-3f * timings.n_sample / timings.n_sample);
-        WHISPER_LOG_INFO("%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_encode, timings.n_encode, 1e-3f * timings.n_encode / timings.n_encode);
-        WHISPER_LOG_INFO("%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_decode, timings.n_decode, 1e-3f * timings.n_decode / timings.n_decode);
-        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_batchd, timings.n_batchd, 1e-3f * timings.n_batchd / timings.n_batchd);
-        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_prompt, timings.n_prompt, 1e-3f * timings.n_prompt / timings.n_prompt);
+        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, timings.mel_ms);
+        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, timings.sample_ms);
+        WHISPER_LOG_INFO("%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_encode_us, n_encode, timings.encode_ms);
+        WHISPER_LOG_INFO("%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_decode_us, n_decode, timings.decode_ms);
+        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, timings.batchd_ms);
+        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, timings.prompt_ms);
     }
     WHISPER_LOG_INFO("%s:    total time = %8.2f ms\n", __func__, (t_end_us - timings.t_start_us)/1000.0f);
 }

diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
@@ -1230,6 +1230,27 @@ PODS:
     - React-logger (= 0.74.6)
     - React-perflogger (= 0.74.6)
     - React-utils (= 0.74.6)
+  - RNCClipboard (1.14.3):
+    - DoubleConversion
+    - glog
+    - hermes-engine
+    - RCT-Folly (= 2024.01.01.00)
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Codegen
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-rendererdebug
+    - React-utils
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - Yoga
   - RNFS (2.20.0):
     - React-Core
   - RNScreens (3.32.0):
@@ -1354,6 +1375,7 @@ DEPENDENCIES:
   - React-runtimescheduler (from `../node_modules/react-native/ReactCommon/react/renderer/runtimescheduler`)
   - React-utils (from `../node_modules/react-native/ReactCommon/react/utils`)
   - ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
+  - "RNCClipboard (from `../node_modules/@react-native-clipboard/clipboard`)"
   - RNFS (from `../node_modules/react-native-fs`)
   - RNScreens (from `../node_modules/react-native-screens`)
   - RNSound (from `../node_modules/react-native-sound`)
@@ -1476,6 +1498,8 @@ EXTERNAL SOURCES:
     :path: "../node_modules/react-native/ReactCommon/react/utils"
   ReactCommon:
     :path: "../node_modules/react-native/ReactCommon"
+  RNCClipboard:
+    :path: "../node_modules/@react-native-clipboard/clipboard"
   RNFS:
     :path: "../node_modules/react-native-fs"
   RNScreens:
@@ -1544,6 +1568,7 @@ SPEC CHECKSUMS:
   React-runtimescheduler: b63ebebd3e000e0ba4ac19ca69bdac071559ad57
   React-utils: 2955bdc1b2ed495f14dc7d3bfbbb7e3624cfc0fc
   ReactCommon: 5c504a77030c7ab89eee75b1725b80d8cee7f5d7
+  RNCClipboard: 99d86f515e6262a8a1d0915f1f6e6b410698aa3a
   RNFS: 4ac0f0ea233904cb798630b3c077808c06931688
   RNScreens: d3d50aa84db4541eee00fbb1f32151030f56c510
   RNSound: 6c156f925295bdc83e8e422e7d8b38d33bc71852

diff --git a/example/package.json b/example/package.json
@@ -9,6 +9,7 @@
     "pods": "pod-install --quiet"
   },
   "dependencies": {
+    "@react-native-clipboard/clipboard": "^1.14.3",
     "@react-native/babel-preset": "0.74.88",
     "@react-native/metro-config": "^0.74.79",
     "@react-navigation/native": "^6.1.18",

diff --git a/example/src/Bench.tsx b/example/src/Bench.tsx
@@ -8,8 +8,7 @@ import {
   Pressable,
 } from 'react-native'
 import RNFS from 'react-native-fs'
-import { initWhisper, libVersion, AudioSessionIos } from '../../src' // whisper.rn
-import type { WhisperContext } from '../../src'
+import { initWhisper } from '../../src' // whisper.rn
 import contextOpts from './context-opts'
 
 const baseURL = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/'
@@ -326,7 +325,14 @@ export default function Bench() {
                 const { nThreads, nEncode, nDecode, nBatchd, nPrompt } = result
                 const fa = Platform.OS === 'ios' ? '1' : '0'
                 log(
-                  `| <todo> | ${Platform.OS} | METAL | ${modelName} | ${nThreads} | ${fa} | ${nEncode} | ${nDecode} | ${nBatchd} | ${nPrompt} | <todo> |`,
+                  // TODO: config
+                  `| <todo> | ${
+                    Platform.OS
+                  } | NEON BLAS METAL | ${modelName} | ${nThreads} | ${fa} | ${nEncode.toFixed(
+                    2,
+                  )} | ${nDecode.toFixed(2)} | ${nBatchd.toFixed(
+                    2,
+                  )} | ${nPrompt.toFixed(2)} | <todo> |`,
                 )
               } finally {
                 await ctx.release()

diff --git a/example/yarn.lock b/example/yarn.lock
@@ -1459,6 +1459,11 @@
     "@nodelib/fs.scandir" "2.1.5"
     fastq "^1.6.0"
 
+"@react-native-clipboard/clipboard@^1.14.3":
+  version "1.14.3"
+  resolved "https://registry.yarnpkg.com/@react-native-clipboard/clipboard/-/clipboard-1.14.3.tgz#3596ade1d85b0accdd2585674fcea3ea185f0e20"
+  integrity sha512-EVWxJfCSyBN2SH5b3JrA/w1qlYu3vihQOfdD7fs/BYp63xL6qy93CvbFDHzF8ooFpGM6f67hkAN+gxl1RfOKuw==
+
 "@react-native-community/[email protected]":
   version "13.6.9"
   resolved "https://registry.yarnpkg.com/@react-native-community/cli-clean/-/cli-clean-13.6.9.tgz#b6754f39c2b877c9d730feb848945150e1d52209"

diff --git a/scripts/whisper.cpp.patch b/scripts/whisper.cpp.patch
@@ -1,5 +1,5 @@
---- whisper.cpp.orig	2024-11-07 13:09:13
-+++ whisper.cpp	2024-11-07 13:03:02
+--- whisper.cpp.orig	2024-11-07 14:03:54
++++ whisper.cpp	2024-11-07 14:01:48
 @@ -3388,8 +3388,10 @@
          const size_t memory_size = aheads_masks_nbytes(state->aheads_masks);
          WHISPER_LOG_INFO("%s: alignment heads masks size = %ld B\n", __func__, memory_size);
@@ -27,39 +27,43 @@
          /*.flash_attn           =*/ false,
          /*.gpu_device           =*/ 0,
 
-@@ -4185,29 +4189,54 @@
+@@ -4185,29 +4189,64 @@
  whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
      return ctx->vocab.token_transcribe;
  }
 +
 +struct whisper_timings whisper_get_timings(struct whisper_context * ctx) {
-+    const int64_t t_end_us = wsp_ggml_time_us();
 +    if (ctx->state == nullptr) {
 +        return {
 +            .load_us = ctx->t_load_us,
 +            .t_start_us = ctx->t_start_us,
 +            .fail_p = 0,
 +            .fail_h = 0,
-+            .t_mel_us = 0,
-+            .n_sample = 0,
-+            .n_encode = 0,
-+            .n_decode = 0,
-+            .n_batchd = 0,
-+            .n_prompt = 0,
++            .mel_ms = 0,
++            .sample_ms = 0,
++            .encode_ms = 0,
++            .decode_ms = 0,
++            .batchd_ms = 0,
++            .prompt_ms = 0,
 +        };
 +    }
 
++    const int32_t n_sample = std::max(1, ctx->state->n_sample);
++    const int32_t n_encode = std::max(1, ctx->state->n_encode);
++    const int32_t n_decode = std::max(1, ctx->state->n_decode);
++    const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
++    const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
 +    return {
 +        .load_us = ctx->t_load_us,
 +        .t_start_us = ctx->t_start_us,
 +        .fail_p = ctx->state->n_fail_p,
 +        .fail_h = ctx->state->n_fail_h,
-+        .t_mel_us = ctx->state->t_mel_us,
-+        .n_sample = std::max(1, ctx->state->n_sample),
-+        .n_encode = std::max(1, ctx->state->n_encode),
-+        .n_decode = std::max(1, ctx->state->n_decode),
-+        .n_batchd = std::max(1, ctx->state->n_batchd),
-+        .n_prompt = std::max(1, ctx->state->n_prompt),
++        .mel_ms = ctx->state->t_mel_us / 1000.0f,
++        .sample_ms = 1e-3f * ctx->state->t_sample_us / n_sample,
++        .encode_ms = 1e-3f * ctx->state->t_encode_us / n_encode,
++        .decode_ms = 1e-3f * ctx->state->t_decode_us / n_decode,
++        .batchd_ms = 1e-3f * ctx->state->t_batchd_us / n_batchd,
++        .prompt_ms = 1e-3f * ctx->state->t_prompt_us / n_prompt,
 +    };
 +}
 +
@@ -72,12 +76,12 @@
 +    WHISPER_LOG_INFO("%s:     load time = %8.2f ms\n", __func__, timings.load_us / 1000.0f);
      if (ctx->state != nullptr) {
 -
--        const int32_t n_sample = std::max(1, ctx->state->n_sample);
--        const int32_t n_encode = std::max(1, ctx->state->n_encode);
--        const int32_t n_decode = std::max(1, ctx->state->n_decode);
--        const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
--        const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
--
+         const int32_t n_sample = std::max(1, ctx->state->n_sample);
+         const int32_t n_encode = std::max(1, ctx->state->n_encode);
+         const int32_t n_decode = std::max(1, ctx->state->n_decode);
+         const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
+         const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
+
 -        WHISPER_LOG_INFO("%s:     fallbacks = %3d p / %3d h\n", __func__, ctx->state->n_fail_p, ctx->state->n_fail_h);
 -        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, ctx->state->t_mel_us / 1000.0f);
 -        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, 1e-3f * ctx->state->t_sample_us / n_sample);
@@ -86,12 +90,12 @@
 -        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, 1e-3f * ctx->state->t_batchd_us / n_batchd);
 -        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, 1e-3f * ctx->state->t_prompt_us / n_prompt);
 +        WHISPER_LOG_INFO("%s:     fallbacks = %3d p / %3d h\n", __func__, timings.fail_p, timings.fail_h);
-+        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, timings.t_mel_us / 1000.0f);
-+        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_sample, timings.n_sample, 1e-3f * timings.n_sample / timings.n_sample);
-+        WHISPER_LOG_INFO("%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_encode, timings.n_encode, 1e-3f * timings.n_encode / timings.n_encode);
-+        WHISPER_LOG_INFO("%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_decode, timings.n_decode, 1e-3f * timings.n_decode / timings.n_decode);
-+        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_batchd, timings.n_batchd, 1e-3f * timings.n_batchd / timings.n_batchd);
-+        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_prompt, timings.n_prompt, 1e-3f * timings.n_prompt / timings.n_prompt);
++        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, timings.mel_ms);
++        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, timings.sample_ms);
++        WHISPER_LOG_INFO("%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_encode_us, n_encode, timings.encode_ms);
++        WHISPER_LOG_INFO("%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_decode_us, n_decode, timings.decode_ms);
++        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, timings.batchd_ms);
++        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, timings.prompt_ms);
      }
 -    WHISPER_LOG_INFO("%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
 +    WHISPER_LOG_INFO("%s:    total time = %8.2f ms\n", __func__, (t_end_us - timings.t_start_us)/1000.0f);