diff --git a/android/src/main/java/com/rnwhisper/WhisperContext.java b/android/src/main/java/com/rnwhisper/WhisperContext.java index c6f30d5..0ec7549 100644 --- a/android/src/main/java/com/rnwhisper/WhisperContext.java +++ b/android/src/main/java/com/rnwhisper/WhisperContext.java @@ -83,6 +83,7 @@ private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int boolean isSpeech = true; if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) { int vadSec = options.hasKey("vadMs") ? options.getInt("vadMs") / 1000 : 2; + if (vadSec < 2) vadSec = 2; int sampleSize = vadSec * SAMPLE_RATE; if (nSamples + n > sampleSize) { int start = nSamples + n - sampleSize; diff --git a/docs/API/README.md b/docs/API/README.md index 846844c..ed45d74 100644 --- a/docs/API/README.md +++ b/docs/API/README.md @@ -58,7 +58,7 @@ whisper.rn #### Defined in -[index.ts:76](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L76) +[index.ts:76](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L76) ___ @@ -79,7 +79,7 @@ ___ #### Defined in -[index.ts:428](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L428) +[index.ts:428](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L428) ___ @@ -89,7 +89,7 @@ ___ #### Defined in -[index.ts:59](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L59) +[index.ts:59](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L59) ___ @@ -107,7 +107,7 @@ ___ #### Defined in -[index.ts:52](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L52) +[index.ts:52](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L52) ___ @@ -126,7 +126,7 @@ ___ #### Defined in -[index.ts:45](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L45) +[index.ts:45](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L45) ___ @@ -156,7 +156,7 @@ ___ #### Defined in -[NativeRNWhisper.ts:5](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/NativeRNWhisper.ts#L5) +[NativeRNWhisper.ts:5](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/NativeRNWhisper.ts#L5) ___ @@ -174,7 +174,7 @@ ___ #### Defined in -[index.ts:70](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L70) +[index.ts:70](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L70) ___ @@ -199,7 +199,7 @@ ___ #### Defined in -[index.ts:133](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L133) +[index.ts:133](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L133) ___ @@ -217,7 +217,7 @@ ___ #### Defined in -[index.ts:166](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L166) +[index.ts:166](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L166) ___ @@ -241,7 +241,7 @@ ___ #### Defined in -[index.ts:153](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L153) +[index.ts:153](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L153) ___ @@ -251,7 +251,7 @@ ___ #### Defined in -[index.ts:84](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L84) +[index.ts:84](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L84) ___ @@ -269,7 +269,7 @@ ___ #### Defined in -[NativeRNWhisper.ts:37](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/NativeRNWhisper.ts#L37) +[NativeRNWhisper.ts:37](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/NativeRNWhisper.ts#L37) ## Variables @@ -294,7 +294,7 @@ AudioSession Utility, iOS only. #### Defined in -[AudioSessionIos.ts:50](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L50) +[AudioSessionIos.ts:50](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L50) ___ @@ -306,7 +306,7 @@ Is allow fallback to CPU if load CoreML model failed #### Defined in -[index.ts:526](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L526) +[index.ts:526](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L526) ___ @@ -318,7 +318,7 @@ Is use CoreML models on iOS #### Defined in -[index.ts:523](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L523) +[index.ts:523](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L523) ___ @@ -330,7 +330,7 @@ Current version of whisper.cpp #### Defined in -[index.ts:518](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L518) +[index.ts:518](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L518) ## Functions @@ -350,7 +350,7 @@ Current version of whisper.cpp #### Defined in -[index.ts:452](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L452) +[index.ts:452](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L452) ___ @@ -364,4 +364,4 @@ ___ #### Defined in -[index.ts:513](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L513) +[index.ts:513](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L513) diff --git a/docs/API/classes/WhisperContext.md b/docs/API/classes/WhisperContext.md index 72206e4..315003c 100644 --- a/docs/API/classes/WhisperContext.md +++ b/docs/API/classes/WhisperContext.md @@ -32,7 +32,7 @@ #### Defined in -[index.ts:186](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L186) +[index.ts:186](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L186) ## Properties @@ -42,7 +42,7 @@ #### Defined in -[index.ts:184](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L184) +[index.ts:184](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L184) ## Methods @@ -56,7 +56,7 @@ #### Defined in -[index.ts:423](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L423) +[index.ts:423](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L423) ___ @@ -84,7 +84,7 @@ Transcribe audio file #### Defined in -[index.ts:191](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L191) +[index.ts:191](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L191) ___ @@ -106,4 +106,4 @@ Transcribe the microphone audio stream, the microphone user permission is requir #### Defined in -[index.ts:287](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/index.ts#L287) +[index.ts:287](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/index.ts#L287) diff --git a/docs/API/enums/AudioSessionCategoryIos.md b/docs/API/enums/AudioSessionCategoryIos.md index b92e21a..fb620f0 100644 --- a/docs/API/enums/AudioSessionCategoryIos.md +++ b/docs/API/enums/AudioSessionCategoryIos.md @@ -25,7 +25,7 @@ https://developer.apple.com/documentation/avfaudio/avaudiosessioncategory?langua #### Defined in -[AudioSessionIos.ts:8](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L8) +[AudioSessionIos.ts:8](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L8) ___ @@ -35,7 +35,7 @@ ___ #### Defined in -[AudioSessionIos.ts:13](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L13) +[AudioSessionIos.ts:13](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L13) ___ @@ -45,7 +45,7 @@ ___ #### Defined in -[AudioSessionIos.ts:12](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L12) +[AudioSessionIos.ts:12](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L12) ___ @@ -55,7 +55,7 @@ ___ #### Defined in -[AudioSessionIos.ts:10](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L10) +[AudioSessionIos.ts:10](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L10) ___ @@ -65,7 +65,7 @@ ___ #### Defined in -[AudioSessionIos.ts:11](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L11) +[AudioSessionIos.ts:11](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L11) ___ @@ -75,4 +75,4 @@ ___ #### Defined in -[AudioSessionIos.ts:9](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L9) +[AudioSessionIos.ts:9](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L9) diff --git a/docs/API/enums/AudioSessionCategoryOptionIos.md b/docs/API/enums/AudioSessionCategoryOptionIos.md index 4406b79..a46772d 100644 --- a/docs/API/enums/AudioSessionCategoryOptionIos.md +++ b/docs/API/enums/AudioSessionCategoryOptionIos.md @@ -26,7 +26,7 @@ https://developer.apple.com/documentation/avfaudio/avaudiosessioncategoryoptions #### Defined in -[AudioSessionIos.ts:25](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L25) +[AudioSessionIos.ts:25](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L25) ___ @@ -36,7 +36,7 @@ ___ #### Defined in -[AudioSessionIos.ts:23](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L23) +[AudioSessionIos.ts:23](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L23) ___ @@ -46,7 +46,7 @@ ___ #### Defined in -[AudioSessionIos.ts:24](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L24) +[AudioSessionIos.ts:24](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L24) ___ @@ -56,7 +56,7 @@ ___ #### Defined in -[AudioSessionIos.ts:26](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L26) +[AudioSessionIos.ts:26](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L26) ___ @@ -66,7 +66,7 @@ ___ #### Defined in -[AudioSessionIos.ts:21](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L21) +[AudioSessionIos.ts:21](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L21) ___ @@ -76,7 +76,7 @@ ___ #### Defined in -[AudioSessionIos.ts:22](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L22) +[AudioSessionIos.ts:22](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L22) ___ @@ -86,4 +86,4 @@ ___ #### Defined in -[AudioSessionIos.ts:20](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L20) +[AudioSessionIos.ts:20](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L20) diff --git a/docs/API/enums/AudioSessionModeIos.md b/docs/API/enums/AudioSessionModeIos.md index 0127545..74ebb30 100644 --- a/docs/API/enums/AudioSessionModeIos.md +++ b/docs/API/enums/AudioSessionModeIos.md @@ -27,7 +27,7 @@ https://developer.apple.com/documentation/avfaudio/avaudiosessionmode?language=o #### Defined in -[AudioSessionIos.ts:33](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L33) +[AudioSessionIos.ts:33](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L33) ___ @@ -37,7 +37,7 @@ ___ #### Defined in -[AudioSessionIos.ts:36](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L36) +[AudioSessionIos.ts:36](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L36) ___ @@ -47,7 +47,7 @@ ___ #### Defined in -[AudioSessionIos.ts:38](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L38) +[AudioSessionIos.ts:38](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L38) ___ @@ -57,7 +57,7 @@ ___ #### Defined in -[AudioSessionIos.ts:39](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L39) +[AudioSessionIos.ts:39](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L39) ___ @@ -67,7 +67,7 @@ ___ #### Defined in -[AudioSessionIos.ts:40](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L40) +[AudioSessionIos.ts:40](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L40) ___ @@ -77,7 +77,7 @@ ___ #### Defined in -[AudioSessionIos.ts:35](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L35) +[AudioSessionIos.ts:35](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L35) ___ @@ -87,7 +87,7 @@ ___ #### Defined in -[AudioSessionIos.ts:37](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L37) +[AudioSessionIos.ts:37](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L37) ___ @@ -97,4 +97,4 @@ ___ #### Defined in -[AudioSessionIos.ts:34](https://github.com/mybigday/whisper.rn/blob/66e9a0c/src/AudioSessionIos.ts#L34) +[AudioSessionIos.ts:34](https://github.com/mybigday/whisper.rn/blob/f3ce9a6/src/AudioSessionIos.ts#L34) diff --git a/ios/RNWhisperContext.mm b/ios/RNWhisperContext.mm index f015209..e195790 100644 --- a/ios/RNWhisperContext.mm +++ b/ios/RNWhisperContext.mm @@ -57,6 +57,8 @@ - (void)prepareRealtime:(NSDictionary *)options { self->recordState.useVad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false; self->recordState.vadSec = options[@"vadMs"] != nil ? [options[@"vadMs"] intValue] / 1000 : 2; + if (self->recordState.vadSec < 2) self->recordState.vadSec = 2; + self->recordState.vadThold = options[@"vadThold"] != nil ? [options[@"vadThold"] floatValue] : 0.6f; self->recordState.vadFreqThold = options[@"vadFreqThold"] != nil ? [options[@"vadFreqThold"] floatValue] : 100.0f; diff --git a/src/index.ts b/src/index.ts index c9fe796..968626e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -106,7 +106,7 @@ export type TranscribeRealtimeOptions = TranscribeOptions & { */ useVad?: boolean /** - * The length of the collected audio is used for VAD. (ms) (Default: 2000) + * The length of the collected audio is used for VAD, cannot be less than 2000ms. (ms) (Default: 2000) */ vadMs?: number /**