diff --git a/android/src/main/java/com/rnwhisper/WhisperContext.java b/android/src/main/java/com/rnwhisper/WhisperContext.java index 75c59fa..9703e5c 100644 --- a/android/src/main/java/com/rnwhisper/WhisperContext.java +++ b/android/src/main/java/com/rnwhisper/WhisperContext.java @@ -82,9 +82,9 @@ private void rewind() { private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) { boolean isSpeech = true; if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) { - int vadSec = options.hasKey("vadMs") ? options.getInt("vadMs") / 1000 : 2; - if (vadSec < 2) vadSec = 2; - int sampleSize = vadSec * SAMPLE_RATE; + int vadMs = options.hasKey("vadMs") ? options.getInt("vadMs") : 2000; + if (vadMs < 2000) vadMs = 2000; + int sampleSize = (int) (SAMPLE_RATE * vadMs / 1000); if (nSamples + n > sampleSize) { int start = nSamples + n - sampleSize; float[] audioData = new float[sampleSize]; diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock index 839871b..600a807 100644 --- a/example/ios/Podfile.lock +++ b/example/ios/Podfile.lock @@ -764,7 +764,7 @@ PODS: - SSZipArchive (~> 2.2) - SocketRocket (0.6.0) - SSZipArchive (2.4.3) - - whisper-rn (0.4.0-rc.1): + - whisper-rn (0.4.0-rc.2): - RCT-Folly - RCTRequired - RCTTypeSafety @@ -1006,7 +1006,7 @@ SPEC CHECKSUMS: RNZipArchive: ef9451b849c45a29509bf44e65b788829ab07801 SocketRocket: fccef3f9c5cedea1353a9ef6ada904fde10d6608 SSZipArchive: fe6a26b2a54d5a0890f2567b5cc6de5caa600aef - whisper-rn: 6dc1b9a55eb0d9e9c0cb00999660c04c32d6732f + whisper-rn: 02c39fead176096e6d8420c4f7f4326f122b36e3 Yoga: f7decafdc5e8c125e6fa0da38a687e35238420fa YogaKit: f782866e155069a2cca2517aafea43200b01fd5a diff --git a/ios/RNWhisperContext.h b/ios/RNWhisperContext.h index 351a35c..4bee2fa 100644 --- a/ios/RNWhisperContext.h +++ b/ios/RNWhisperContext.h @@ -30,7 +30,7 @@ typedef struct { NSString* audioOutputPath; bool useVad; - int vadSec; + int vadMs; float vadThold; float vadFreqThold; diff --git a/ios/RNWhisperContext.mm b/ios/RNWhisperContext.mm index 11b9e4f..2c90755 100644 --- a/ios/RNWhisperContext.mm +++ b/ios/RNWhisperContext.mm @@ -56,8 +56,8 @@ - (void)prepareRealtime:(NSDictionary *)options { self->recordState.audioOutputPath = options[@"audioOutputPath"]; self->recordState.useVad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false; - self->recordState.vadSec = options[@"vadMs"] != nil ? [options[@"vadMs"] intValue] / 1000 : 2; - if (self->recordState.vadSec < 2) self->recordState.vadSec = 2; + self->recordState.vadMs = options[@"vadMs"] != nil ? [options[@"vadMs"] intValue] : 2000; + if (self->recordState.vadMs < 2000) self->recordState.vadMs = 2000; self->recordState.vadThold = options[@"vadThold"] != nil ? [options[@"vadThold"] floatValue] : 0.6f; self->recordState.vadFreqThold = options[@"vadFreqThold"] != nil ? [options[@"vadFreqThold"] floatValue] : 100.0f; @@ -100,7 +100,7 @@ bool vad(RNWhisperContextRecordState *state, int16_t* audioBufferI16, int nSampl { bool isSpeech = true; if (!state->isTranscribing && state->useVad) { - int sampleSize = state->vadSec * WHISPER_SAMPLE_RATE; + int sampleSize = (int) (WHISPER_SAMPLE_RATE * state->vadMs / 1000); if (nSamples + n > sampleSize) { int start = nSamples + n - sampleSize; std::vector audioBufferF32Vec(sampleSize);