Skip to content

Commit

Permalink
Add swift example for generating subtitles
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Sep 18, 2023
1 parent 4282a91 commit 5f79bb6
Show file tree
Hide file tree
Showing 7 changed files with 403 additions and 7 deletions.
6 changes: 3 additions & 3 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,8 @@ SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop(
p->impl->Pop();
}

SHERPA_ONNX_API SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
SherpaOnnxVoiceActivityDetector *p) {
SHERPA_ONNX_API const SherpaOnnxSpeechSegment *
SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
const sherpa_onnx::SpeechSegment &segment = p->impl->Front();

SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
Expand All @@ -491,7 +491,7 @@ SHERPA_ONNX_API SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
return ans;
}

void SherpaOnnxDestroySpeechSegment(SherpaOnnxSpeechSegment *p) {
void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
delete[] p->samples;
delete p;
}
Expand Down
7 changes: 4 additions & 3 deletions sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,11 +548,12 @@ SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop(
// Return the first speech segment.
// The user has to use SherpaOnnxDestroySpeechSegment() to free the returned
// pointer to avoid memory leak.
SHERPA_ONNX_API SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
SherpaOnnxVoiceActivityDetector *p);
SHERPA_ONNX_API const SherpaOnnxSpeechSegment *
SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p);

// Free the pointer returned SherpaOnnxVoiceActivityDetectorFront().
SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(SherpaOnnxSpeechSegment *p);
SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
const SherpaOnnxSpeechSegment *p);

// Re-initialize the voice activity detector.
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
Expand Down
1 change: 1 addition & 0 deletions swift-api-examples/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
decode-file
decode-file-non-streaming
generate-subtitles
142 changes: 142 additions & 0 deletions swift-api-examples/SherpaOnnx.swift
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,145 @@ class SherpaOnnxOfflineRecognizer {
return SherpaOnnxOfflineRecongitionResult(result: result)
}
}

func sherpaOnnxSileroVadModelConfig(
model: String,
threshold: Float = 0.5,
minSilenceDuration: Float = 0.25,
minSpeechDuration: Float = 0.5,
windowSize: Int = 512
) -> SherpaOnnxSileroVadModelConfig {
return SherpaOnnxSileroVadModelConfig(
model: toCPointer(model),
threshold: threshold,
min_silence_duration: minSilenceDuration,
min_speech_duration: minSpeechDuration,
window_size: Int32(windowSize)
)
}

func sherpaOnnxVadModelConfig(
sileroVad: SherpaOnnxSileroVadModelConfig,
sampleRate: Int32 = 16000,
numThreads: Int = 1,
provider: String = "cpu",
debug: Int = 0
) -> SherpaOnnxVadModelConfig {
return SherpaOnnxVadModelConfig(
silero_vad: sileroVad,
sample_rate: sampleRate,
num_threads: Int32(numThreads),
provider: toCPointer(provider),
debug: Int32(debug)
)
}

class SherpaOnnxCircularBufferWrapper {
let buffer: OpaquePointer!

init(capacity: Int) {
buffer = SherpaOnnxCreateCircularBuffer(Int32(capacity))
}

deinit {
if let buffer {
SherpaOnnxDestroyCircularBuffer(buffer)
}
}

func push(samples: [Float]) {
SherpaOnnxCircularBufferPush(buffer, samples, Int32(samples.count))
}

func get(startIndex: Int, n: Int) -> [Float] {
let p: UnsafePointer<Float>! = SherpaOnnxCircularBufferGet(buffer, Int32(startIndex), Int32(n))

var samples: [Float] = []

for index in 0..<n {
samples.append(p[Int(index)])
}

SherpaOnnxCircularBufferFree(p)

return samples
}

func pop(n: Int) {
SherpaOnnxCircularBufferPop(buffer, Int32(n))
}

func size() -> Int {
return Int(SherpaOnnxCircularBufferSize(buffer))
}

func reset() {
SherpaOnnxCircularBufferReset(buffer)
}
}

class SherpaOnnxSpeechSegmentWrapper {
let p: UnsafePointer<SherpaOnnxSpeechSegment>!

init(p: UnsafePointer<SherpaOnnxSpeechSegment>!) {
self.p = p
}

deinit {
if let p {
SherpaOnnxDestroySpeechSegment(p)
}
}

var start: Int {
return Int(p.pointee.start)
}

var n: Int {
return Int(p.pointee.n)
}

var samples: [Float] {
var samples: [Float] = []
for index in 0..<n {
samples.append(p.pointee.samples[Int(index)])
}
return samples
}
}

class SherpaOnnxVoiceActivityDetectorWrapper {
/// A pointer to the underlying counterpart in C
let vad: OpaquePointer!

init(config: UnsafePointer<SherpaOnnxVadModelConfig>!, buffer_size_in_seconds: Float) {
vad = SherpaOnnxCreateVoiceActivityDetector(config, buffer_size_in_seconds)
}

deinit {
if let vad {
SherpaOnnxDestroyVoiceActivityDetector(vad)
}
}

func acceptWaveform(samples: [Float]) {
SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples, Int32(samples.count))
}

func isEmpty() -> Bool {
return SherpaOnnxVoiceActivityDetectorEmpty(vad) == 1 ? true : false
}

func pop() {
SherpaOnnxVoiceActivityDetectorPop(vad)
}

func front() -> SherpaOnnxSpeechSegmentWrapper {
let p: UnsafePointer<SherpaOnnxSpeechSegment>? = SherpaOnnxVoiceActivityDetectorFront(vad)
return SherpaOnnxSpeechSegmentWrapper(p: p)
}

func reset() {
SherpaOnnxVoiceActivityDetectorReset(vad)
}
}
1 change: 0 additions & 1 deletion swift-api-examples/decode-file-non-streaming.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ extension AVAudioPCMBuffer {
}

func run() {

var recognizer: SherpaOnnxOfflineRecognizer
var modelConfig: SherpaOnnxOfflineModelConfig
var modelType = "whisper"
Expand Down
Loading

0 comments on commit 5f79bb6

Please sign in to comment.