Skip to content

Commit

Permalink
feat(ios): add option to saving recorded audio as wav on startRealtim…
Browse files Browse the repository at this point in the history
…eTranscribe
  • Loading branch information
jhen0409 committed Sep 23, 2023
1 parent 61f01e7 commit 750ab66
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 4 deletions.
24 changes: 21 additions & 3 deletions ios/RNWhisper.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

/* Begin PBXBuildFile section */
5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */ = {isa = PBXBuildFile; fileRef = B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */; };
7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */; };
7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */; };
7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */; };
/* End PBXBuildFile section */

/* Begin PBXCopyFilesBuildPhase section */
Expand All @@ -24,6 +27,13 @@

/* Begin PBXFileReference section */
134814201AA4EA6300B7C361 /* libRNWhisper.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNWhisper.a; sourceTree = BUILT_PRODUCTS_DIR; };
7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisper.h; sourceTree = "<group>"; };
7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperDownloader.m; sourceTree = "<group>"; };
7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioUtils.m; sourceTree = "<group>"; };
7FE0BB9D2ABE6C7B0049B4E4 /* RNWhisperContext.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperContext.h; sourceTree = "<group>"; };
7FE0BB9E2ABE6C7B0049B4E4 /* RNWhisperDownloader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperDownloader.h; sourceTree = "<group>"; };
7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioUtils.h; sourceTree = "<group>"; };
7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RNWhisperContext.mm; sourceTree = "<group>"; };
B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RNWhisper.mm; sourceTree = "<group>"; };
/* End PBXFileReference section */

Expand All @@ -49,6 +59,13 @@
58B511D21A9E6C8500147676 = {
isa = PBXGroup;
children = (
7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */,
7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */,
7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */,
7FE0BB9D2ABE6C7B0049B4E4 /* RNWhisperContext.h */,
7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */,
7FE0BB9E2ABE6C7B0049B4E4 /* RNWhisperDownloader.h */,
7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */,
B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */,
134814211AA4EA7D00B7C361 /* Products */,
);
Expand Down Expand Up @@ -112,6 +129,9 @@
buildActionMask = 2147483647;
files = (
5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */,
7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */,
7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */,
7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down Expand Up @@ -223,9 +243,7 @@
"$(SRCROOT)/../../react-native/React/**",
);
LIBRARY_SEARCH_PATHS = "$(inherited)";
OTHER_LDFLAGS = (
"-ObjC",
);
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = RNWhisper;
SKIP_INSTALL = YES;
};
Expand Down
8 changes: 8 additions & 0 deletions ios/RNWhisperAudioUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#import <Foundation/Foundation.h>

@interface RNWhisperAudioUtils : NSObject

+ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceSize:(int)sliceSize lastSliceSize:(int)lastSliceSize;
+ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile;

@end
64 changes: 64 additions & 0 deletions ios/RNWhisperAudioUtils.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#import "RNWhisperAudioUtils.h"
#import "whisper.h"

@implementation RNWhisperAudioUtils

+ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceSize:(int)sliceSize lastSliceSize:(int)lastSliceSize {
NSMutableData *outputData = [NSMutableData data];
for (NSValue *buffer in buffers) {
int size = sliceSize;
if (buffer == buffers.lastObject) {
size = lastSliceSize;
}
short *bufferPtr = buffer.pointerValue;
[outputData appendBytes:bufferPtr length:size * sizeof(short)];
}
return outputData;
}

+ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile {
NSMutableData *outputData = [NSMutableData data];

// WAVE header
[outputData appendData:[@"RIFF" dataUsingEncoding:NSUTF8StringEncoding]]; // chunk id
int chunkSize = CFSwapInt32HostToLittle(36 + rawData.length);
[outputData appendBytes:&chunkSize length:sizeof(chunkSize)];
[outputData appendData:[@"WAVE" dataUsingEncoding:NSUTF8StringEncoding]]; // format
[outputData appendData:[@"fmt " dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 1 id

int subchunk1Size = CFSwapInt32HostToLittle(16);
[outputData appendBytes:&subchunk1Size length:sizeof(subchunk1Size)];

short audioFormat = CFSwapInt16HostToLittle(1); // PCM
[outputData appendBytes:&audioFormat length:sizeof(audioFormat)];

short numChannels = CFSwapInt16HostToLittle(1); // mono
[outputData appendBytes:&numChannels length:sizeof(numChannels)];

int sampleRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE);
[outputData appendBytes:&sampleRate length:sizeof(sampleRate)];

// (bitDepth * sampleRate * channels) >> 3
int byteRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE * 1 * 16 / 8);
[outputData appendBytes:&byteRate length:sizeof(byteRate)];

// (bitDepth * channels) >> 3
short blockAlign = CFSwapInt16HostToLittle(16 / 8);
[outputData appendBytes:&blockAlign length:sizeof(blockAlign)];

// bitDepth
short bitsPerSample = CFSwapInt16HostToLittle(16);
[outputData appendBytes:&bitsPerSample length:sizeof(bitsPerSample)];

[outputData appendData:[@"data" dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 2 id
int subchunk2Size = CFSwapInt32HostToLittle((int)rawData.length);
[outputData appendBytes:&subchunk2Size length:sizeof(subchunk2Size)];

// Audio data
[outputData appendData:rawData];

// Save to file
[outputData writeToFile:audioOutputFile atomically:YES];
}

@end
12 changes: 12 additions & 0 deletions ios/RNWhisperContext.mm
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#import "RNWhisperContext.h"
#import "RNWhisperAudioUtils.h"

#define NUM_BYTES_PER_BUFFER 16 * 1024

Expand Down Expand Up @@ -212,6 +213,17 @@ - (void)fullTranscribeSamples:(RNWhisperContextRecordState*) state {
NSLog(@"[RNWhisper] Transcribe end");
result[@"isStoppedByAction"] = @(state->isStoppedByAction);
result[@"isCapturing"] = @(false);

// Save wav if needed
if (state->options[@"audioOutputPath"] != nil) {
[RNWhisperAudioUtils
saveWavFile:[RNWhisperAudioUtils concatShortBuffers:state->shortBufferSlices
sliceSize:state->audioSliceSec * WHISPER_SAMPLE_RATE
lastSliceSize:nSamplesOfIndex]
audioOutputFile:state->options[@"audioOutputPath"]
];
}

state->transcribeHandler(state->jobId, @"end", result);
} else if (code == 0) {
result[@"isCapturing"] = @(true);
Expand Down
1 change: 0 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
realtimeAudioSliceSec?: number
/**
* Output path for audio file. If not set, the audio file will not be saved
* TODO: Support iOS
* (Default: Undefined)
*/
audioOutputPath?: string
Expand Down

0 comments on commit 750ab66

Please sign in to comment.