diff --git a/.gitignore b/.gitignore index d6f49076a..d2d16a1b9 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,10 @@ run-offline-decode-files-nemo-ctc.sh *.jar sherpa-onnx-nemo-ctc-* *.wav +sherpa-onnx-zipformer-* +sherpa-onnx-conformer-* +sherpa-onnx-whisper-* +swift-api-examples/k2fsa-* +run-*.sh +two-pass-*.sh +build-* diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 827ad6a1b..1619f3b27 100644 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -21,10 +21,6 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 class MainActivity : AppCompatActivity() { private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) - // If there is a GPU and useGPU is true, we will use GPU - // If there is no GPU and useGPU is true, we won't use GPU - private val useGPU: Boolean = true - private lateinit var model: SherpaOnnx private var audioRecord: AudioRecord? = null private lateinit var recordButton: Button @@ -91,7 +87,7 @@ class MainActivity : AppCompatActivity() { audioRecord!!.startRecording() recordButton.setText(R.string.stop) isRecording = true - model.reset() + model.reset(true) textView.text = "" lastText = "" idx = 0 @@ -125,26 +121,32 @@ class MainActivity : AppCompatActivity() { while (model.isReady()) { model.decode() } - runOnUiThread { - val isEndpoint = model.isEndpoint() - val text = model.text - - if(text.isNotBlank()) { - if (lastText.isBlank()) { - textView.text = "${idx}: ${text}" - } else { - textView.text = "${lastText}\n${idx}: ${text}" - } + + val isEndpoint = model.isEndpoint() + val text = model.text + + var textToDisplay = lastText; + + if(text.isNotBlank()) { + if (lastText.isBlank()) { + textToDisplay = "${idx}: ${text}" + } else { + textToDisplay = "${lastText}\n${idx}: ${text}" } + } - if (isEndpoint) { - model.reset() - if (text.isNotBlank()) { - lastText = "${lastText}\n${idx}: ${text}" - idx += 1 - } + if (isEndpoint) { + model.reset() + if (text.isNotBlank()) { + lastText = "${lastText}\n${idx}: ${text}" + textToDisplay = lastText; + idx += 1 } } + + runOnUiThread { + textView.text = textToDisplay + } } } } diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt index c68703b46..185765622 100644 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt @@ -77,7 +77,7 @@ class SherpaOnnx( acceptWaveform(ptr, samples, sampleRate) fun inputFinished() = inputFinished(ptr) - fun reset() = reset(ptr) + fun reset(recreate: Boolean = false) = reset(ptr, recreate = recreate) fun decode() = decode(ptr) fun isEndpoint(): Boolean = isEndpoint(ptr) fun isReady(): Boolean = isReady(ptr) @@ -99,7 +99,7 @@ class SherpaOnnx( private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) private external fun inputFinished(ptr: Long) private external fun getText(ptr: Long): String - private external fun reset(ptr: Long) + private external fun reset(ptr: Long, recreate: Boolean) private external fun decode(ptr: Long) private external fun isEndpoint(ptr: Long): Boolean private external fun isReady(ptr: Long): Boolean diff --git a/android/SherpaOnnx2Pass/.gitignore b/android/SherpaOnnx2Pass/.gitignore new file mode 100644 index 000000000..aa724b770 --- /dev/null +++ b/android/SherpaOnnx2Pass/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnx2Pass/.idea/.gitignore b/android/SherpaOnnx2Pass/.idea/.gitignore new file mode 100644 index 000000000..26d33521a --- /dev/null +++ b/android/SherpaOnnx2Pass/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/android/SherpaOnnx2Pass/.idea/compiler.xml b/android/SherpaOnnx2Pass/.idea/compiler.xml new file mode 100644 index 000000000..fb7f4a8a4 --- /dev/null +++ b/android/SherpaOnnx2Pass/.idea/compiler.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/.idea/gradle.xml b/android/SherpaOnnx2Pass/.idea/gradle.xml new file mode 100644 index 000000000..a2d7c2133 --- /dev/null +++ b/android/SherpaOnnx2Pass/.idea/gradle.xml @@ -0,0 +1,19 @@ + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/.idea/misc.xml b/android/SherpaOnnx2Pass/.idea/misc.xml new file mode 100644 index 000000000..bdd92780c --- /dev/null +++ b/android/SherpaOnnx2Pass/.idea/misc.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/.idea/vcs.xml b/android/SherpaOnnx2Pass/.idea/vcs.xml new file mode 100644 index 000000000..b2bdec2d7 --- /dev/null +++ b/android/SherpaOnnx2Pass/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/.gitignore b/android/SherpaOnnx2Pass/app/.gitignore new file mode 100644 index 000000000..42afabfd2 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/build.gradle b/android/SherpaOnnx2Pass/app/build.gradle new file mode 100644 index 000000000..d64be8079 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/build.gradle @@ -0,0 +1,44 @@ +plugins { + id 'com.android.application' + id 'org.jetbrains.kotlin.android' +} + +android { + namespace 'com.k2fsa.sherpa.onnx' + compileSdk 32 + + defaultConfig { + applicationId "com.k2fsa.sherpa.onnx" + minSdk 21 + targetSdk 32 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = '1.8' + } +} + +dependencies { + + implementation 'androidx.core:core-ktx:1.7.0' + implementation 'androidx.appcompat:appcompat:1.5.1' + implementation 'com.google.android.material:material:1.7.0' + implementation 'androidx.constraintlayout:constraintlayout:2.1.4' + testImplementation 'junit:junit:4.13.2' + androidTestImplementation 'androidx.test.ext:junit:1.1.4' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0' +} \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/proguard-rules.pro b/android/SherpaOnnx2Pass/app/proguard-rules.pro new file mode 100644 index 000000000..481bb4348 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt b/android/SherpaOnnx2Pass/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt new file mode 100644 index 000000000..183383202 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/.gitignore b/android/SherpaOnnx2Pass/app/src/main/.gitignore new file mode 100644 index 000000000..140f8cf80 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/.gitignore @@ -0,0 +1 @@ +*.so diff --git a/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml b/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml new file mode 100644 index 000000000..2a440df14 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/assets/.gitkeep b/android/SherpaOnnx2Pass/app/src/main/assets/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt new file mode 100644 index 000000000..012c0db5e --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -0,0 +1,251 @@ +package com.k2fsa.sherpa.onnx + +import android.Manifest +import android.content.pm.PackageManager +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import android.os.Bundle +import android.text.method.ScrollingMovementMethod +import android.util.Log +import android.widget.Button +import android.widget.TextView +import androidx.appcompat.app.AppCompatActivity +import androidx.core.app.ActivityCompat +import kotlin.concurrent.thread + +private const val TAG = "sherpa-onnx" +private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 + +class MainActivity : AppCompatActivity() { + private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) + + private lateinit var onlineRecognizer: SherpaOnnx + private lateinit var offlineRecognizer: SherpaOnnxOffline + private var audioRecord: AudioRecord? = null + private lateinit var recordButton: Button + private lateinit var textView: TextView + private var recordingThread: Thread? = null + + private val audioSource = MediaRecorder.AudioSource.MIC + private val sampleRateInHz = 16000 + private val channelConfig = AudioFormat.CHANNEL_IN_MONO + + private var samplesBuffer = arrayListOf() + + // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT + // since the AudioRecord.read(float[]) needs API level >= 23 + // but we are targeting API level >= 21 + private val audioFormat = AudioFormat.ENCODING_PCM_16BIT + private var idx: Int = 0 + private var lastText: String = "" + + @Volatile + private var isRecording: Boolean = false + + override fun onRequestPermissionsResult( + requestCode: Int, permissions: Array, grantResults: IntArray + ) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults) + val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) { + grantResults[0] == PackageManager.PERMISSION_GRANTED + } else { + false + } + + if (!permissionToRecordAccepted) { + Log.e(TAG, "Audio record is disallowed") + finish() + } + + Log.i(TAG, "Audio record is permitted") + } + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + setContentView(R.layout.activity_main) + + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) + + Log.i(TAG, "Start to initialize first-pass recognizer") + initOnlineRecognizer() + Log.i(TAG, "Finished initializing first-pass recognizer") + + Log.i(TAG, "Start to initialize second-pass recognizer") + initOfflineRecognizer() + Log.i(TAG, "Finished initializing second-pass recognizer") + + recordButton = findViewById(R.id.record_button) + recordButton.setOnClickListener { onclick() } + + textView = findViewById(R.id.my_text) + textView.movementMethod = ScrollingMovementMethod() + } + + private fun onclick() { + if (!isRecording) { + val ret = initMicrophone() + if (!ret) { + Log.e(TAG, "Failed to initialize microphone") + return + } + Log.i(TAG, "state: ${audioRecord?.state}") + audioRecord!!.startRecording() + recordButton.setText(R.string.stop) + isRecording = true + onlineRecognizer.reset(true) + samplesBuffer.clear() + textView.text = "" + lastText = "" + idx = 0 + + recordingThread = thread(true) { + processSamples() + } + Log.i(TAG, "Started recording") + } else { + isRecording = false + audioRecord!!.stop() + audioRecord!!.release() + audioRecord = null + recordButton.setText(R.string.start) + Log.i(TAG, "Stopped recording") + } + } + + private fun processSamples() { + Log.i(TAG, "processing samples") + + val interval = 0.1 // i.e., 100 ms + val bufferSize = (interval * sampleRateInHz).toInt() // in samples + val buffer = ShortArray(bufferSize) + + while (isRecording) { + val ret = audioRecord?.read(buffer, 0, buffer.size) + if (ret != null && ret > 0) { + val samples = FloatArray(ret) { buffer[it] / 32768.0f } + samplesBuffer.add(samples) + + onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz) + while (onlineRecognizer.isReady()) { + onlineRecognizer.decode() + } + val isEndpoint = onlineRecognizer.isEndpoint() + var textToDisplay = lastText + + var text = onlineRecognizer.text + if (text.isNotBlank()) { + if (lastText.isBlank()) { + // textView.text = "${idx}: ${text}" + textToDisplay = "${idx}: ${text}" + } else { + textToDisplay = "${lastText}\n${idx}: ${text}" + } + } + + if (isEndpoint) { + onlineRecognizer.reset() + + if (text.isNotBlank()) { + text = runSecondPass() + lastText = "${lastText}\n${idx}: ${text}" + idx += 1 + } else { + samplesBuffer.clear() + } + } + + runOnUiThread { + textView.text = textToDisplay.lowercase() + } + } + } + } + + private fun initMicrophone(): Boolean { + if (ActivityCompat.checkSelfPermission( + this, Manifest.permission.RECORD_AUDIO + ) != PackageManager.PERMISSION_GRANTED + ) { + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) + return false + } + + val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat) + Log.i( + TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}" + ) + + audioRecord = AudioRecord( + audioSource, + sampleRateInHz, + channelConfig, + audioFormat, + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM + ) + return true + } + + private fun initOnlineRecognizer() { + // Please change getModelConfig() to add new models + // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + // for a list of available models + val firstType = 1 + println("Select model type ${firstType} for the first pass") + val config = OnlineRecognizerConfig( + featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), + modelConfig = getModelConfig(type = firstType)!!, + endpointConfig = getEndpointConfig(), + enableEndpoint = true, + ) + + onlineRecognizer = SherpaOnnx( + assetManager = application.assets, + config = config, + ) + } + + private fun initOfflineRecognizer() { + // Please change getOfflineModelConfig() to add new models + // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + // for a list of available models + val secondType = 1 + println("Select model type ${secondType} for the second pass") + + val config = OfflineRecognizerConfig( + featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), + modelConfig = getOfflineModelConfig(type = secondType)!!, + ) + + offlineRecognizer = SherpaOnnxOffline( + assetManager = application.assets, + config = config, + ) + } + + private fun runSecondPass(): String { + var totalSamples = 0 + for (a in samplesBuffer) { + totalSamples += a.size + } + var i = 0 + + val samples = FloatArray(totalSamples) + + // todo(fangjun): Make it more efficient + for (a in samplesBuffer) { + for (s in a) { + samples[i] = s + i += 1 + } + } + + + val n = maxOf(0, samples.size - 8000) + + samplesBuffer.clear() + samplesBuffer.add(samples.sliceArray(n..samples.size-1)) + + return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz) + } +} diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt new file mode 100644 index 000000000..99ca65827 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt @@ -0,0 +1,375 @@ +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class EndpointRule( + var mustContainNonSilence: Boolean, + var minTrailingSilence: Float, + var minUtteranceLength: Float, +) + +data class EndpointConfig( + var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f), + var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f), + var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f) +) + +data class OnlineTransducerModelConfig( + var encoder: String = "", + var decoder: String = "", + var joiner: String = "", +) + +data class OnlineParaformerModelConfig( + var encoder: String = "", + var decoder: String = "", +) + +data class OnlineModelConfig( + var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), + var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), + var tokens: String, + var numThreads: Int = 1, + var debug: Boolean = false, + var provider: String = "cpu", + var modelType: String = "", +) + +data class OnlineLMConfig( + var model: String = "", + var scale: Float = 0.5f, +) + +data class FeatureConfig( + var sampleRate: Int = 16000, + var featureDim: Int = 80, +) + +data class OnlineRecognizerConfig( + var featConfig: FeatureConfig = FeatureConfig(), + var modelConfig: OnlineModelConfig, + var lmConfig: OnlineLMConfig = OnlineLMConfig(), + var endpointConfig: EndpointConfig = EndpointConfig(), + var enableEndpoint: Boolean = true, + var decodingMethod: String = "greedy_search", + var maxActivePaths: Int = 4, +) + +data class OfflineTransducerModelConfig( + var encoder: String = "", + var decoder: String = "", + var joiner: String = "", +) + +data class OfflineParaformerModelConfig( + var model: String = "", +) + +data class OfflineWhisperModelConfig( + var encoder: String = "", + var decoder: String = "", +) + +data class OfflineModelConfig( + var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), + var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), + var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), + var numThreads: Int = 1, + var debug: Boolean = false, + var provider: String = "cpu", + var modelType: String = "", + var tokens: String, +) + +data class OfflineRecognizerConfig( + var featConfig: FeatureConfig = FeatureConfig(), + var modelConfig: OfflineModelConfig, + // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it + var decodingMethod: String = "greedy_search", + var maxActivePaths: Int = 4, +) + +class SherpaOnnx( + assetManager: AssetManager? = null, + var config: OnlineRecognizerConfig, +) { + private val ptr: Long + + init { + if (assetManager != null) { + ptr = new(assetManager, config) + } else { + ptr = newFromFile(config) + } + } + + protected fun finalize() { + delete(ptr) + } + + fun acceptWaveform(samples: FloatArray, sampleRate: Int) = + acceptWaveform(ptr, samples, sampleRate) + + fun inputFinished() = inputFinished(ptr) + fun reset(recreate: Boolean = false) = reset(ptr, recreate = recreate) + fun decode() = decode(ptr) + fun isEndpoint(): Boolean = isEndpoint(ptr) + fun isReady(): Boolean = isReady(ptr) + + val text: String + get() = getText(ptr) + + private external fun delete(ptr: Long) + + private external fun new( + assetManager: AssetManager, + config: OnlineRecognizerConfig, + ): Long + + private external fun newFromFile( + config: OnlineRecognizerConfig, + ): Long + + private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) + private external fun inputFinished(ptr: Long) + private external fun getText(ptr: Long): String + private external fun reset(ptr: Long, recreate: Boolean) + private external fun decode(ptr: Long) + private external fun isEndpoint(ptr: Long): Boolean + private external fun isReady(ptr: Long): Boolean + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} + +class SherpaOnnxOffline( + assetManager: AssetManager? = null, + var config: OfflineRecognizerConfig, +) { + private val ptr: Long + + init { + if (assetManager != null) { + ptr = new(assetManager, config) + } else { + ptr = newFromFile(config) + } + } + + protected fun finalize() { + delete(ptr) + } + + fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate) + + private external fun delete(ptr: Long) + + private external fun new( + assetManager: AssetManager, + config: OfflineRecognizerConfig, + ): Long + + private external fun newFromFile( + config: OfflineRecognizerConfig, + ): Long + + private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} + +fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig { + return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim) +} + +/* +Please see +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +for a list of pre-trained models. + +We only add a few here. Please change the following code +to add your own. (It should be straightforward to add a new model +by following the code) + +@param type +0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23 + encoder/joiner int8, decoder float32 + +1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english + encoder/joiner int8, decoder fp32 + + */ +fun getModelConfig(type: Int): OnlineModelConfig? { + when (type) { + 0 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 1 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + } + return null +} + +/* +Please see +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +for a list of pre-trained models. + +We only add a few here. Please change the following code +to add your own LM model. (It should be straightforward to train a new NN LM model +by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py) + +@param type +0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english + */ +fun getOnlineLMConfig(type: Int): OnlineLMConfig { + when (type) { + 0 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20" + return OnlineLMConfig( + model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx", + scale = 0.5f, + ) + } + } + return OnlineLMConfig() +} + +// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8 +fun getEndpointConfig(): EndpointConfig { + return EndpointConfig( + rule1 = EndpointRule(false, 2.4f, 0.0f), + rule2 = EndpointRule(true, 0.8f, 0.0f), + rule3 = EndpointRule(false, 0.0f, 20.0f) + ) +} + +/* +Please see +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +for a list of pre-trained models. + +We only add a few here. Please change the following code +to add your own. (It should be straightforward to add a new model +by following the code) + +@param type + +0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese + int8 + +1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english + encoder int8, decoder/joiner float32 + +2 - sherpa-onnx-whisper-tiny.en + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en + encoder int8, decoder int8 + +3 - sherpa-onnx-whisper-base.en + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en + encoder int8, decoder int8 + +4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese + encoder/joiner int8, decoder fp32 + + */ +fun getOfflineModelConfig(type: Int): OfflineModelConfig? { + when (type) { + 0 -> { + val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28" + return OfflineModelConfig( + paraformer = OfflineParaformerModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "paraformer", + ) + } + + 1 -> { + val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx", + decoder = "$modelDir/decoder-epoch-30-avg-4.onnx", + joiner = "$modelDir/joiner-epoch-30-avg-4.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 2 -> { + val modelDir = "sherpa-onnx-whisper-tiny.en" + return OfflineModelConfig( + whisper = OfflineWhisperModelConfig( + encoder = "$modelDir/tiny.en-encoder.int8.onnx", + decoder = "$modelDir/tiny.en-decoder.int8.onnx", + ), + tokens = "$modelDir/tiny.en-tokens.txt", + modelType = "whisper", + ) + } + + 3 -> { + val modelDir = "sherpa-onnx-whisper-base.en" + return OfflineModelConfig( + whisper = OfflineWhisperModelConfig( + encoder = "$modelDir/base.en-encoder.int8.onnx", + decoder = "$modelDir/base.en-decoder.int8.onnx", + ), + tokens = "$modelDir/base.en-tokens.txt", + modelType = "whisper", + ) + } + + + 4 -> { + val modelDir = "icefall-asr-zipformer-wenetspeech-20230615" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx", + decoder = "$modelDir/decoder-epoch-12-avg-4.onnx", + joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + } + return null +} diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt new file mode 120000 index 000000000..d65321ad0 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt @@ -0,0 +1 @@ +../../../../../../../../../SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/jniLibs/.gitkeep b/android/SherpaOnnx2Pass/app/src/main/jniLibs/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnx2Pass/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnx2Pass/app/src/main/jniLibs/arm64-v8a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnx2Pass/app/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnx2Pass/app/src/main/jniLibs/armeabi-v7a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnx2Pass/app/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnx2Pass/app/src/main/jniLibs/x86/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnx2Pass/app/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnx2Pass/app/src/main/jniLibs/x86_64/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnx2Pass/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnx2Pass/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 000000000..2b068d114 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnx2Pass/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 000000000..07d5da9cb --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnx2Pass/app/src/main/res/layout/activity_main.xml b/android/SherpaOnnx2Pass/app/src/main/res/layout/activity_main.xml new file mode 100644 index 000000000..f9b35e862 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,39 @@ + + + + + + + +