Skip to content

Commit

Permalink
fix: timings
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Nov 7, 2024
1 parent b4d7d94 commit b2d5f52
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 54 deletions.
9 changes: 5 additions & 4 deletions cpp/rn-whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ std::string bench(struct whisper_context * ctx, int n_threads) {

const int64_t t_end_us = wsp_ggml_time_us();
whisper_timings timings = whisper_get_timings(ctx);

return std::string("[") +
std::to_string(n_threads) + "," +
std::to_string(timings.n_encode) + "," +
std::to_string(timings.n_decode) + "," +
std::to_string(timings.n_batchd) + "," +
std::to_string(timings.n_prompt) + "]";
std::to_string(timings.encode_ms) + "," +
std::to_string(timings.decode_ms) + "," +
std::to_string(timings.batchd_ms) + "," +
std::to_string(timings.prompt_ms) + "]";
}

void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
Expand Down
48 changes: 29 additions & 19 deletions cpp/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4191,33 +4191,37 @@ whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
}

struct whisper_timings whisper_get_timings(struct whisper_context * ctx) {
const int64_t t_end_us = wsp_ggml_time_us();
if (ctx->state == nullptr) {
return {
.load_us = ctx->t_load_us,
.t_start_us = ctx->t_start_us,
.fail_p = 0,
.fail_h = 0,
.t_mel_us = 0,
.n_sample = 0,
.n_encode = 0,
.n_decode = 0,
.n_batchd = 0,
.n_prompt = 0,
.mel_ms = 0,
.sample_ms = 0,
.encode_ms = 0,
.decode_ms = 0,
.batchd_ms = 0,
.prompt_ms = 0,
};
}

const int32_t n_sample = std::max(1, ctx->state->n_sample);
const int32_t n_encode = std::max(1, ctx->state->n_encode);
const int32_t n_decode = std::max(1, ctx->state->n_decode);
const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
return {
.load_us = ctx->t_load_us,
.t_start_us = ctx->t_start_us,
.fail_p = ctx->state->n_fail_p,
.fail_h = ctx->state->n_fail_h,
.t_mel_us = ctx->state->t_mel_us,
.n_sample = std::max(1, ctx->state->n_sample),
.n_encode = std::max(1, ctx->state->n_encode),
.n_decode = std::max(1, ctx->state->n_decode),
.n_batchd = std::max(1, ctx->state->n_batchd),
.n_prompt = std::max(1, ctx->state->n_prompt),
.mel_ms = ctx->state->t_mel_us / 1000.0f,
.sample_ms = 1e-3f * ctx->state->t_sample_us / n_sample,
.encode_ms = 1e-3f * ctx->state->t_encode_us / n_encode,
.decode_ms = 1e-3f * ctx->state->t_decode_us / n_decode,
.batchd_ms = 1e-3f * ctx->state->t_batchd_us / n_batchd,
.prompt_ms = 1e-3f * ctx->state->t_prompt_us / n_prompt,
};
}

Expand All @@ -4228,13 +4232,19 @@ void whisper_print_timings(struct whisper_context * ctx) {
WHISPER_LOG_INFO("\n");
WHISPER_LOG_INFO("%s: load time = %8.2f ms\n", __func__, timings.load_us / 1000.0f);
if (ctx->state != nullptr) {
const int32_t n_sample = std::max(1, ctx->state->n_sample);
const int32_t n_encode = std::max(1, ctx->state->n_encode);
const int32_t n_decode = std::max(1, ctx->state->n_decode);
const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
const int32_t n_prompt = std::max(1, ctx->state->n_prompt);

WHISPER_LOG_INFO("%s: fallbacks = %3d p / %3d h\n", __func__, timings.fail_p, timings.fail_h);
WHISPER_LOG_INFO("%s: mel time = %8.2f ms\n", __func__, timings.t_mel_us / 1000.0f);
WHISPER_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_sample, timings.n_sample, 1e-3f * timings.n_sample / timings.n_sample);
WHISPER_LOG_INFO("%s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_encode, timings.n_encode, 1e-3f * timings.n_encode / timings.n_encode);
WHISPER_LOG_INFO("%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_decode, timings.n_decode, 1e-3f * timings.n_decode / timings.n_decode);
WHISPER_LOG_INFO("%s: batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_batchd, timings.n_batchd, 1e-3f * timings.n_batchd / timings.n_batchd);
WHISPER_LOG_INFO("%s: prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_prompt, timings.n_prompt, 1e-3f * timings.n_prompt / timings.n_prompt);
WHISPER_LOG_INFO("%s: mel time = %8.2f ms\n", __func__, timings.mel_ms);
WHISPER_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, timings.sample_ms);
WHISPER_LOG_INFO("%s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_encode_us, n_encode, timings.encode_ms);
WHISPER_LOG_INFO("%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_decode_us, n_decode, timings.decode_ms);
WHISPER_LOG_INFO("%s: batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, timings.batchd_ms);
WHISPER_LOG_INFO("%s: prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, timings.prompt_ms);
}
WHISPER_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (t_end_us - timings.t_start_us)/1000.0f);
}
Expand Down
25 changes: 25 additions & 0 deletions example/ios/Podfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,27 @@ PODS:
- React-logger (= 0.74.6)
- React-perflogger (= 0.74.6)
- React-utils (= 0.74.6)
- RNCClipboard (1.14.3):
- DoubleConversion
- glog
- hermes-engine
- RCT-Folly (= 2024.01.01.00)
- RCTRequired
- RCTTypeSafety
- React-Codegen
- React-Core
- React-debug
- React-Fabric
- React-featureflags
- React-graphics
- React-ImageManager
- React-NativeModulesApple
- React-RCTFabric
- React-rendererdebug
- React-utils
- ReactCommon/turbomodule/bridging
- ReactCommon/turbomodule/core
- Yoga
- RNFS (2.20.0):
- React-Core
- RNScreens (3.32.0):
Expand Down Expand Up @@ -1354,6 +1375,7 @@ DEPENDENCIES:
- React-runtimescheduler (from `../node_modules/react-native/ReactCommon/react/renderer/runtimescheduler`)
- React-utils (from `../node_modules/react-native/ReactCommon/react/utils`)
- ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
- "RNCClipboard (from `../node_modules/@react-native-clipboard/clipboard`)"
- RNFS (from `../node_modules/react-native-fs`)
- RNScreens (from `../node_modules/react-native-screens`)
- RNSound (from `../node_modules/react-native-sound`)
Expand Down Expand Up @@ -1476,6 +1498,8 @@ EXTERNAL SOURCES:
:path: "../node_modules/react-native/ReactCommon/react/utils"
ReactCommon:
:path: "../node_modules/react-native/ReactCommon"
RNCClipboard:
:path: "../node_modules/@react-native-clipboard/clipboard"
RNFS:
:path: "../node_modules/react-native-fs"
RNScreens:
Expand Down Expand Up @@ -1544,6 +1568,7 @@ SPEC CHECKSUMS:
React-runtimescheduler: b63ebebd3e000e0ba4ac19ca69bdac071559ad57
React-utils: 2955bdc1b2ed495f14dc7d3bfbbb7e3624cfc0fc
ReactCommon: 5c504a77030c7ab89eee75b1725b80d8cee7f5d7
RNCClipboard: 99d86f515e6262a8a1d0915f1f6e6b410698aa3a
RNFS: 4ac0f0ea233904cb798630b3c077808c06931688
RNScreens: d3d50aa84db4541eee00fbb1f32151030f56c510
RNSound: 6c156f925295bdc83e8e422e7d8b38d33bc71852
Expand Down
1 change: 1 addition & 0 deletions example/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"pods": "pod-install --quiet"
},
"dependencies": {
"@react-native-clipboard/clipboard": "^1.14.3",
"@react-native/babel-preset": "0.74.88",
"@react-native/metro-config": "^0.74.79",
"@react-navigation/native": "^6.1.18",
Expand Down
12 changes: 9 additions & 3 deletions example/src/Bench.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ import {
Pressable,
} from 'react-native'
import RNFS from 'react-native-fs'
import { initWhisper, libVersion, AudioSessionIos } from '../../src' // whisper.rn
import type { WhisperContext } from '../../src'
import { initWhisper } from '../../src' // whisper.rn
import contextOpts from './context-opts'

Check failure on line 12 in example/src/Bench.tsx

View workflow job for this annotation

GitHub Actions / test-android

'contextOpts' is defined but never used

Check failure on line 12 in example/src/Bench.tsx

View workflow job for this annotation

GitHub Actions / test-ios

'contextOpts' is defined but never used

Check failure on line 12 in example/src/Bench.tsx

View workflow job for this annotation

GitHub Actions / test-ios

'contextOpts' is defined but never used

Check failure on line 12 in example/src/Bench.tsx

View workflow job for this annotation

GitHub Actions / test-android

'contextOpts' is defined but never used

const baseURL = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/'
Expand Down Expand Up @@ -326,7 +325,14 @@ export default function Bench() {
const { nThreads, nEncode, nDecode, nBatchd, nPrompt } = result
const fa = Platform.OS === 'ios' ? '1' : '0'
log(
`| <todo> | ${Platform.OS} | METAL | ${modelName} | ${nThreads} | ${fa} | ${nEncode} | ${nDecode} | ${nBatchd} | ${nPrompt} | <todo> |`,
// TODO: config
`| <todo> | ${
Platform.OS
} | NEON BLAS METAL | ${modelName} | ${nThreads} | ${fa} | ${nEncode.toFixed(
2,
)} | ${nDecode.toFixed(2)} | ${nBatchd.toFixed(
2,
)} | ${nPrompt.toFixed(2)} | <todo> |`,
)
} finally {
await ctx.release()
Expand Down
5 changes: 5 additions & 0 deletions example/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,11 @@
"@nodelib/fs.scandir" "2.1.5"
fastq "^1.6.0"

"@react-native-clipboard/clipboard@^1.14.3":
version "1.14.3"
resolved "https://registry.yarnpkg.com/@react-native-clipboard/clipboard/-/clipboard-1.14.3.tgz#3596ade1d85b0accdd2585674fcea3ea185f0e20"
integrity sha512-EVWxJfCSyBN2SH5b3JrA/w1qlYu3vihQOfdD7fs/BYp63xL6qy93CvbFDHzF8ooFpGM6f67hkAN+gxl1RfOKuw==

"@react-native-community/[email protected]":
version "13.6.9"
resolved "https://registry.yarnpkg.com/@react-native-community/cli-clean/-/cli-clean-13.6.9.tgz#b6754f39c2b877c9d730feb848945150e1d52209"
Expand Down
60 changes: 32 additions & 28 deletions scripts/whisper.cpp.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--- whisper.cpp.orig 2024-11-07 13:09:13
+++ whisper.cpp 2024-11-07 13:03:02
--- whisper.cpp.orig 2024-11-07 14:03:54
+++ whisper.cpp 2024-11-07 14:01:48
@@ -3388,8 +3388,10 @@
const size_t memory_size = aheads_masks_nbytes(state->aheads_masks);
WHISPER_LOG_INFO("%s: alignment heads masks size = %ld B\n", __func__, memory_size);
Expand Down Expand Up @@ -27,39 +27,43 @@
/*.flash_attn =*/ false,
/*.gpu_device =*/ 0,

@@ -4185,29 +4189,54 @@
@@ -4185,29 +4189,64 @@
whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
return ctx->vocab.token_transcribe;
}
+
+struct whisper_timings whisper_get_timings(struct whisper_context * ctx) {
+ const int64_t t_end_us = wsp_ggml_time_us();
+ if (ctx->state == nullptr) {
+ return {
+ .load_us = ctx->t_load_us,
+ .t_start_us = ctx->t_start_us,
+ .fail_p = 0,
+ .fail_h = 0,
+ .t_mel_us = 0,
+ .n_sample = 0,
+ .n_encode = 0,
+ .n_decode = 0,
+ .n_batchd = 0,
+ .n_prompt = 0,
+ .mel_ms = 0,
+ .sample_ms = 0,
+ .encode_ms = 0,
+ .decode_ms = 0,
+ .batchd_ms = 0,
+ .prompt_ms = 0,
+ };
+ }

+ const int32_t n_sample = std::max(1, ctx->state->n_sample);
+ const int32_t n_encode = std::max(1, ctx->state->n_encode);
+ const int32_t n_decode = std::max(1, ctx->state->n_decode);
+ const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
+ const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
+ return {
+ .load_us = ctx->t_load_us,
+ .t_start_us = ctx->t_start_us,
+ .fail_p = ctx->state->n_fail_p,
+ .fail_h = ctx->state->n_fail_h,
+ .t_mel_us = ctx->state->t_mel_us,
+ .n_sample = std::max(1, ctx->state->n_sample),
+ .n_encode = std::max(1, ctx->state->n_encode),
+ .n_decode = std::max(1, ctx->state->n_decode),
+ .n_batchd = std::max(1, ctx->state->n_batchd),
+ .n_prompt = std::max(1, ctx->state->n_prompt),
+ .mel_ms = ctx->state->t_mel_us / 1000.0f,
+ .sample_ms = 1e-3f * ctx->state->t_sample_us / n_sample,
+ .encode_ms = 1e-3f * ctx->state->t_encode_us / n_encode,
+ .decode_ms = 1e-3f * ctx->state->t_decode_us / n_decode,
+ .batchd_ms = 1e-3f * ctx->state->t_batchd_us / n_batchd,
+ .prompt_ms = 1e-3f * ctx->state->t_prompt_us / n_prompt,
+ };
+}
+
Expand All @@ -72,12 +76,12 @@
+ WHISPER_LOG_INFO("%s: load time = %8.2f ms\n", __func__, timings.load_us / 1000.0f);
if (ctx->state != nullptr) {
-
- const int32_t n_sample = std::max(1, ctx->state->n_sample);
- const int32_t n_encode = std::max(1, ctx->state->n_encode);
- const int32_t n_decode = std::max(1, ctx->state->n_decode);
- const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
- const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
-
const int32_t n_sample = std::max(1, ctx->state->n_sample);
const int32_t n_encode = std::max(1, ctx->state->n_encode);
const int32_t n_decode = std::max(1, ctx->state->n_decode);
const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
const int32_t n_prompt = std::max(1, ctx->state->n_prompt);

- WHISPER_LOG_INFO("%s: fallbacks = %3d p / %3d h\n", __func__, ctx->state->n_fail_p, ctx->state->n_fail_h);
- WHISPER_LOG_INFO("%s: mel time = %8.2f ms\n", __func__, ctx->state->t_mel_us / 1000.0f);
- WHISPER_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, 1e-3f * ctx->state->t_sample_us / n_sample);
Expand All @@ -86,12 +90,12 @@
- WHISPER_LOG_INFO("%s: batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, 1e-3f * ctx->state->t_batchd_us / n_batchd);
- WHISPER_LOG_INFO("%s: prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, 1e-3f * ctx->state->t_prompt_us / n_prompt);
+ WHISPER_LOG_INFO("%s: fallbacks = %3d p / %3d h\n", __func__, timings.fail_p, timings.fail_h);
+ WHISPER_LOG_INFO("%s: mel time = %8.2f ms\n", __func__, timings.t_mel_us / 1000.0f);
+ WHISPER_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_sample, timings.n_sample, 1e-3f * timings.n_sample / timings.n_sample);
+ WHISPER_LOG_INFO("%s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_encode, timings.n_encode, 1e-3f * timings.n_encode / timings.n_encode);
+ WHISPER_LOG_INFO("%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_decode, timings.n_decode, 1e-3f * timings.n_decode / timings.n_decode);
+ WHISPER_LOG_INFO("%s: batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_batchd, timings.n_batchd, 1e-3f * timings.n_batchd / timings.n_batchd);
+ WHISPER_LOG_INFO("%s: prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings.n_prompt, timings.n_prompt, 1e-3f * timings.n_prompt / timings.n_prompt);
+ WHISPER_LOG_INFO("%s: mel time = %8.2f ms\n", __func__, timings.mel_ms);
+ WHISPER_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, timings.sample_ms);
+ WHISPER_LOG_INFO("%s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_encode_us, n_encode, timings.encode_ms);
+ WHISPER_LOG_INFO("%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_decode_us, n_decode, timings.decode_ms);
+ WHISPER_LOG_INFO("%s: batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, timings.batchd_ms);
+ WHISPER_LOG_INFO("%s: prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, timings.prompt_ms);
}
- WHISPER_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
+ WHISPER_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (t_end_us - timings.t_start_us)/1000.0f);
Expand Down

0 comments on commit b2d5f52

Please sign in to comment.