diff --git a/android/src/main/java/com/rnwhisper/WhisperContext.java b/android/src/main/java/com/rnwhisper/WhisperContext.java index bd658ddd..02294740 100644 --- a/android/src/main/java/com/rnwhisper/WhisperContext.java +++ b/android/src/main/java/com/rnwhisper/WhisperContext.java @@ -53,6 +53,7 @@ public class WhisperContext { private boolean isCapturing = false; private boolean isStoppedByAction = false; private boolean isTranscribing = false; + private boolean isTdrzEnable = false; private Thread rootFullHandler = null; private Thread fullHandler = null; @@ -73,6 +74,7 @@ private void rewind() { isCapturing = false; isStoppedByAction = false; isTranscribing = false; + isTdrzEnable = false; rootFullHandler = null; fullHandler = null; } @@ -113,6 +115,8 @@ public int startRealtimeTranscribe(int jobId, ReadableMap options) { double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0; final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1; + this.isTdrzEnable = options.hasKey("tdrzEnable") && options.getBoolean("tdrzEnable"); + createRealtimeTranscribeJob(jobId, context, options); sliceNSamples = new ArrayList(); @@ -333,8 +337,9 @@ public WritableMap transcribeInputStream(int jobId, InputStream inputStream, Rea throw new Exception("Context is already in capturing or transcribing"); } rewind(); - this.jobId = jobId; + this.isTdrzEnable = options.hasKey("tdrzEnable") && options.getBoolean("tdrzEnable"); + isTranscribing = true; float[] audioData = AudioUtils.decodeWaveFile(inputStream); @@ -368,8 +373,15 @@ private WritableMap getTextSegments(int start, int count) { WritableMap data = Arguments.createMap(); WritableArray segments = Arguments.createArray(); + for (int i = 0; i < count; i++) { String text = getTextSegment(context, i); + + // If tdrzEnable is enabled and speaker turn is detected + if (this.isTdrzEnable && getTextSegmentSpeakerTurnNext(context, i)) { + text += " [SPEAKER_TURN]"; + } + builder.append(text); WritableMap segment = Arguments.createMap(); @@ -499,6 +511,7 @@ protected static native int fullWithNewJob( protected static native String getTextSegment(long context, int index); protected static native int getTextSegmentT0(long context, int index); protected static native int getTextSegmentT1(long context, int index); + protected static native boolean getTextSegmentSpeakerTurnNext(long context, int index); protected static native void createRealtimeTranscribeJob( int job_id, diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp index 7a1f3bf6..09cd3f9d 100644 --- a/android/src/main/jni.cpp +++ b/android/src/main/jni.cpp @@ -208,6 +208,7 @@ struct whisper_full_params createFullParams(JNIEnv *env, jobject options) { params.translate = readablemap::getBool(env, options, "translate", false); params.speed_up = readablemap::getBool(env, options, "speedUp", false); params.token_timestamps = readablemap::getBool(env, options, "tokenTimestamps", false); + params.tdrz_enable = readablemap::getBool(env, options, "tdrzEnable", false); params.offset_ms = 0; params.no_context = true; params.single_segment = false; @@ -493,4 +494,13 @@ Java_com_rnwhisper_WhisperContext_freeContext( whisper_free(context); } +JNIEXPORT jboolean JNICALL +Java_com_rnwhisper_WhisperContext_getTextSegmentSpeakerTurnNext( + JNIEnv *env, jobject thiz, jlong context_ptr, jint index) { + UNUSED(env); + UNUSED(thiz); + struct whisper_context *context = reinterpret_cast(context_ptr); + return whisper_full_get_segment_speaker_turn_next(context, index); +} + } // extern "C" diff --git a/docs/API/README.md b/docs/API/README.md index c8ecbb5b..c5b4fb51 100644 --- a/docs/API/README.md +++ b/docs/API/README.md @@ -58,7 +58,7 @@ whisper.rn #### Defined in -[index.ts:76](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L76) +[index.ts:76](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L76) ___ @@ -80,7 +80,7 @@ ___ #### Defined in -[index.ts:441](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L441) +[index.ts:441](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L441) ___ @@ -90,7 +90,7 @@ ___ #### Defined in -[index.ts:59](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L59) +[index.ts:59](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L59) ___ @@ -108,7 +108,7 @@ ___ #### Defined in -[index.ts:52](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L52) +[index.ts:52](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L52) ___ @@ -127,7 +127,7 @@ ___ #### Defined in -[index.ts:45](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L45) +[index.ts:45](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L45) ___ @@ -149,6 +149,7 @@ ___ | `offset?` | `number` | Time offset in milliseconds | | `prompt?` | `string` | Initial Prompt | | `speedUp?` | `boolean` | Speed up audio by x2 (reduced accuracy) | +| `tdrzEnable?` | `boolean` | Enable tinydiarize (requires a tdrz model) | | `temperature?` | `number` | Tnitial decoding temperature | | `temperatureInc?` | `number` | - | | `tokenTimestamps?` | `boolean` | Enable token-level timestamps | @@ -157,7 +158,7 @@ ___ #### Defined in -[NativeRNWhisper.ts:5](https://github.com/mybigday/whisper.rn/blob/85066fc/src/NativeRNWhisper.ts#L5) +[NativeRNWhisper.ts:5](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/NativeRNWhisper.ts#L5) ___ @@ -175,7 +176,7 @@ ___ #### Defined in -[index.ts:70](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L70) +[index.ts:70](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L70) ___ @@ -200,7 +201,7 @@ ___ #### Defined in -[index.ts:138](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L138) +[index.ts:138](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L138) ___ @@ -218,7 +219,7 @@ ___ #### Defined in -[index.ts:171](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L171) +[index.ts:171](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L171) ___ @@ -242,7 +243,7 @@ ___ #### Defined in -[index.ts:158](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L158) +[index.ts:158](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L158) ___ @@ -252,7 +253,7 @@ ___ #### Defined in -[index.ts:84](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L84) +[index.ts:84](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L84) ___ @@ -270,7 +271,7 @@ ___ #### Defined in -[NativeRNWhisper.ts:37](https://github.com/mybigday/whisper.rn/blob/85066fc/src/NativeRNWhisper.ts#L37) +[NativeRNWhisper.ts:39](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/NativeRNWhisper.ts#L39) ## Variables @@ -295,7 +296,7 @@ AudioSession Utility, iOS only. #### Defined in -[AudioSessionIos.ts:50](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L50) +[AudioSessionIos.ts:50](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L50) ___ @@ -307,7 +308,7 @@ Is allow fallback to CPU if load CoreML model failed #### Defined in -[index.ts:543](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L543) +[index.ts:543](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L543) ___ @@ -319,7 +320,7 @@ Is use CoreML models on iOS #### Defined in -[index.ts:540](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L540) +[index.ts:540](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L540) ___ @@ -331,7 +332,7 @@ Current version of whisper.cpp #### Defined in -[index.ts:535](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L535) +[index.ts:535](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L535) ## Functions @@ -351,7 +352,7 @@ Current version of whisper.cpp #### Defined in -[index.ts:467](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L467) +[index.ts:467](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L467) ___ @@ -365,4 +366,4 @@ ___ #### Defined in -[index.ts:530](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L530) +[index.ts:530](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L530) diff --git a/docs/API/classes/WhisperContext.md b/docs/API/classes/WhisperContext.md index 678eec36..bff13260 100644 --- a/docs/API/classes/WhisperContext.md +++ b/docs/API/classes/WhisperContext.md @@ -34,7 +34,7 @@ #### Defined in -[index.ts:195](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L195) +[index.ts:195](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L195) ## Properties @@ -44,7 +44,7 @@ #### Defined in -[index.ts:191](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L191) +[index.ts:191](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L191) ___ @@ -54,7 +54,7 @@ ___ #### Defined in -[index.ts:189](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L189) +[index.ts:189](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L189) ___ @@ -64,7 +64,7 @@ ___ #### Defined in -[index.ts:193](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L193) +[index.ts:193](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L193) ## Methods @@ -78,7 +78,7 @@ ___ #### Defined in -[index.ts:436](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L436) +[index.ts:436](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L436) ___ @@ -106,7 +106,7 @@ Transcribe audio file #### Defined in -[index.ts:206](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L206) +[index.ts:206](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L206) ___ @@ -128,4 +128,4 @@ Transcribe the microphone audio stream, the microphone user permission is requir #### Defined in -[index.ts:302](https://github.com/mybigday/whisper.rn/blob/85066fc/src/index.ts#L302) +[index.ts:302](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/index.ts#L302) diff --git a/docs/API/enums/AudioSessionCategoryIos.md b/docs/API/enums/AudioSessionCategoryIos.md index 83ed4412..49cf6c81 100644 --- a/docs/API/enums/AudioSessionCategoryIos.md +++ b/docs/API/enums/AudioSessionCategoryIos.md @@ -25,7 +25,7 @@ https://developer.apple.com/documentation/avfaudio/avaudiosessioncategory?langua #### Defined in -[AudioSessionIos.ts:8](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L8) +[AudioSessionIos.ts:8](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L8) ___ @@ -35,7 +35,7 @@ ___ #### Defined in -[AudioSessionIos.ts:13](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L13) +[AudioSessionIos.ts:13](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L13) ___ @@ -45,7 +45,7 @@ ___ #### Defined in -[AudioSessionIos.ts:12](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L12) +[AudioSessionIos.ts:12](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L12) ___ @@ -55,7 +55,7 @@ ___ #### Defined in -[AudioSessionIos.ts:10](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L10) +[AudioSessionIos.ts:10](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L10) ___ @@ -65,7 +65,7 @@ ___ #### Defined in -[AudioSessionIos.ts:11](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L11) +[AudioSessionIos.ts:11](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L11) ___ @@ -75,4 +75,4 @@ ___ #### Defined in -[AudioSessionIos.ts:9](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L9) +[AudioSessionIos.ts:9](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L9) diff --git a/docs/API/enums/AudioSessionCategoryOptionIos.md b/docs/API/enums/AudioSessionCategoryOptionIos.md index 10c7e628..df25ad89 100644 --- a/docs/API/enums/AudioSessionCategoryOptionIos.md +++ b/docs/API/enums/AudioSessionCategoryOptionIos.md @@ -26,7 +26,7 @@ https://developer.apple.com/documentation/avfaudio/avaudiosessioncategoryoptions #### Defined in -[AudioSessionIos.ts:25](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L25) +[AudioSessionIos.ts:25](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L25) ___ @@ -36,7 +36,7 @@ ___ #### Defined in -[AudioSessionIos.ts:23](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L23) +[AudioSessionIos.ts:23](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L23) ___ @@ -46,7 +46,7 @@ ___ #### Defined in -[AudioSessionIos.ts:24](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L24) +[AudioSessionIos.ts:24](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L24) ___ @@ -56,7 +56,7 @@ ___ #### Defined in -[AudioSessionIos.ts:26](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L26) +[AudioSessionIos.ts:26](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L26) ___ @@ -66,7 +66,7 @@ ___ #### Defined in -[AudioSessionIos.ts:21](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L21) +[AudioSessionIos.ts:21](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L21) ___ @@ -76,7 +76,7 @@ ___ #### Defined in -[AudioSessionIos.ts:22](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L22) +[AudioSessionIos.ts:22](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L22) ___ @@ -86,4 +86,4 @@ ___ #### Defined in -[AudioSessionIos.ts:20](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L20) +[AudioSessionIos.ts:20](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L20) diff --git a/docs/API/enums/AudioSessionModeIos.md b/docs/API/enums/AudioSessionModeIos.md index dbe35621..00ddf9ba 100644 --- a/docs/API/enums/AudioSessionModeIos.md +++ b/docs/API/enums/AudioSessionModeIos.md @@ -27,7 +27,7 @@ https://developer.apple.com/documentation/avfaudio/avaudiosessionmode?language=o #### Defined in -[AudioSessionIos.ts:33](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L33) +[AudioSessionIos.ts:33](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L33) ___ @@ -37,7 +37,7 @@ ___ #### Defined in -[AudioSessionIos.ts:36](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L36) +[AudioSessionIos.ts:36](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L36) ___ @@ -47,7 +47,7 @@ ___ #### Defined in -[AudioSessionIos.ts:38](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L38) +[AudioSessionIos.ts:38](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L38) ___ @@ -57,7 +57,7 @@ ___ #### Defined in -[AudioSessionIos.ts:39](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L39) +[AudioSessionIos.ts:39](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L39) ___ @@ -67,7 +67,7 @@ ___ #### Defined in -[AudioSessionIos.ts:40](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L40) +[AudioSessionIos.ts:40](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L40) ___ @@ -77,7 +77,7 @@ ___ #### Defined in -[AudioSessionIos.ts:35](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L35) +[AudioSessionIos.ts:35](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L35) ___ @@ -87,7 +87,7 @@ ___ #### Defined in -[AudioSessionIos.ts:37](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L37) +[AudioSessionIos.ts:37](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L37) ___ @@ -97,4 +97,4 @@ ___ #### Defined in -[AudioSessionIos.ts:34](https://github.com/mybigday/whisper.rn/blob/85066fc/src/AudioSessionIos.ts#L34) +[AudioSessionIos.ts:34](https://github.com/mybigday/whisper.rn/blob/8f61e46/src/AudioSessionIos.ts#L34) diff --git a/ios/RNWhisperContext.mm b/ios/RNWhisperContext.mm index 5012def2..f835ef45 100644 --- a/ios/RNWhisperContext.mm +++ b/ios/RNWhisperContext.mm @@ -353,6 +353,7 @@ - (OSStatus)transcribeRealtime:(int)jobId struct rnwhisper_segments_callback_data { void (^onNewSegments)(NSDictionary *); int total_n_new; + bool tdrzEnable; }; - (void)transcribeFile:(int)jobId @@ -386,12 +387,18 @@ - (void)transcribeFile:(int)jobId NSMutableArray *segments = [[NSMutableArray alloc] init]; for (int i = data->total_n_new - n_new; i < data->total_n_new; i++) { const char * text_cur = whisper_full_get_segment_text(ctx, i); - text = [text stringByAppendingString:[NSString stringWithUTF8String:text_cur]]; + NSMutableString *mutable_ns_text = [NSMutableString stringWithUTF8String:text_cur]; + + if (data->tdrzEnable && whisper_full_get_segment_speaker_turn_next(ctx, i)) { + [mutable_ns_text appendString:@" [SPEAKER_TURN]"]; + } + + text = [text stringByAppendingString:mutable_ns_text]; const int64_t t0 = whisper_full_get_segment_t0(ctx, i); const int64_t t1 = whisper_full_get_segment_t1(ctx, i); NSDictionary *segment = @{ - @"text": [NSString stringWithUTF8String:text_cur], + @"text": [NSString stringWithString:mutable_ns_text], @"t0": [NSNumber numberWithLongLong:t0], @"t1": [NSNumber numberWithLongLong:t1] }; @@ -409,7 +416,8 @@ - (void)transcribeFile:(int)jobId }; struct rnwhisper_segments_callback_data user_data = { .onNewSegments = onNewSegments, - .total_n_new = 0 + .tdrzEnable = options[@"tdrzEnable"] && [options[@"tdrzEnable"] boolValue], + .total_n_new = 0, }; params.new_segment_callback_user_data = &user_data; } @@ -481,6 +489,7 @@ - (struct whisper_full_params)createParams:(NSDictionary *)options jobId:(int)jo params.max_len = [options[@"maxLen"] intValue]; } params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false; + params.tdrz_enable = options[@"tdrzEnable"] != nil ? [options[@"tdrzEnable"] boolValue] : false; if (options[@"bestOf"] != nil) { params.greedy.best_of = [options[@"bestOf"] intValue]; @@ -530,12 +539,21 @@ - (NSMutableDictionary *)getTextSegments { NSMutableArray *segments = [[NSMutableArray alloc] init]; for (int i = 0; i < n_segments; i++) { const char * text_cur = whisper_full_get_segment_text(self->ctx, i); - text = [text stringByAppendingString:[NSString stringWithUTF8String:text_cur]]; + NSMutableString *mutable_ns_text = [NSMutableString stringWithUTF8String:text_cur]; + + // Simplified condition + if (self->recordState.options[@"tdrzEnable"] && + [self->recordState.options[@"tdrzEnable"] boolValue] && + whisper_full_get_segment_speaker_turn_next(self->ctx, i)) { + [mutable_ns_text appendString:@" [SPEAKER_TURN]"]; + } + + text = [text stringByAppendingString:mutable_ns_text]; const int64_t t0 = whisper_full_get_segment_t0(self->ctx, i); const int64_t t1 = whisper_full_get_segment_t1(self->ctx, i); NSDictionary *segment = @{ - @"text": [NSString stringWithUTF8String:text_cur], + @"text": [NSString stringWithString:mutable_ns_text], @"t0": [NSNumber numberWithLongLong:t0], @"t1": [NSNumber numberWithLongLong:t1] }; diff --git a/src/NativeRNWhisper.ts b/src/NativeRNWhisper.ts index 8290f62c..1a03f85b 100644 --- a/src/NativeRNWhisper.ts +++ b/src/NativeRNWhisper.ts @@ -15,6 +15,8 @@ export type TranscribeOptions = { maxLen?: number, /** Enable token-level timestamps */ tokenTimestamps?: boolean, + /** Enable tinydiarize (requires a tdrz model) */ + tdrzEnable?: boolean, /** Word timestamp probability threshold */ wordThold?: number, /** Time offset in milliseconds */