From 794bb27b3010d00952b9ed6a057678d7b8271711 Mon Sep 17 00:00:00 2001 From: ad31c0 Date: Mon, 7 Jun 2021 23:54:09 +0200 Subject: [PATCH] Add VAD platform capability through webrtcvad --- package-lock.json | 16 +++++++++++++ package.json | 3 ++- service/config.js | 3 +-- service/main.js | 2 +- service/platform/index.js | 48 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 67 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0bb1984..a0d4447 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3973,6 +3973,22 @@ "resolved": "https://registry.npmjs.org/weak-map/-/weak-map-1.0.5.tgz", "integrity": "sha1-eWkVhNmGB/UHC9O3CkDmuyLkAes=" }, + "webrtcvad": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/webrtcvad/-/webrtcvad-1.0.1.tgz", + "integrity": "sha512-oLfReCmGMpRducFWKP+o0GpKZLPj0u6qkln3P7wGaEzyjxtBiFuzvK28pmKF5SSihEkC1RGKO7Pi9C4VfY1q4Q==", + "requires": { + "bindings": "^1.3.0", + "node-addon-api": "^1.7.1" + }, + "dependencies": { + "node-addon-api": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-1.7.2.tgz", + "integrity": "sha512-ibPK3iA+vaY1eEjESkQkM0BbCqFOaZMiXRTtdB0u7b4djtY6JnsjvPdUHVMg6xQt3B8fpTTWHI9A+ADjM9frzg==" + } + } + }, "which": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", diff --git a/package.json b/package.json index cf5b1e0..f362779 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,8 @@ "node-gettext": "^3.0.0", "pulseaudio2": "^0.5.0", "snowboy": "^1.3.1", - "thingpedia": "~2.9.0" + "thingpedia": "~2.9.0", + "webrtcvad": "^1.0.1" }, "devDependencies": { "eslint": "^7.27.0" diff --git a/service/config.js b/service/config.js index d423ddb..fc392a6 100644 --- a/service/config.js +++ b/service/config.js @@ -21,5 +21,4 @@ module.exports.SEMPRE_URL = 'https://nlp.almond.stanford.edu'; module.exports.THINGPEDIA_URL = 'https://thingpedia.stanford.edu/thingpedia'; -module.exports.MS_SPEECH_RECOGNITION_PRIMARY_KEY = 'de1f02817356494483ba502b2ce95f6f'; -module.exports.MS_SPEECH_RECOGNITION_SECONDARY_KEY = '3dc6ce0b832940f0b0c984a1517c457e'; +module.exports.NL_URL = process.env.THINGENGINE_NL_URL || 'https://nlp-staging.almond.stanford.edu'; diff --git a/service/main.js b/service/main.js index 602674c..e4d77a6 100644 --- a/service/main.js +++ b/service/main.js @@ -338,7 +338,7 @@ class AppControlChannel extends events.EventEmitter { this._history = []; this._speechHandler = new Genie.SpeechHandler(this._conversation, _engine.platform, { - subscriptionKey: Config.MS_SPEECH_RECOGNITION_PRIMARY_KEY + nlUrl: Config.NL_URL }); let play; diff --git a/service/platform/index.js b/service/platform/index.js index 0522016..69da93f 100644 --- a/service/platform/index.js +++ b/service/platform/index.js @@ -258,6 +258,45 @@ class SystemSettings { } } +let webrtcvad; +try { + webrtcvad = require('webrtcvad').default; +} catch(e) { + console.log("VAD not available"); + webrtcvad = null; +} + +class VAD { + constructor() { + this._instance = null; + this.frameSize = 0; + } + + setup(bitrate, level) { + if (this._instance) + this._instance = null; + + if (webrtcvad) { + this._instance = new webrtcvad(bitrate, level); + // 16khz audio single-channel 16 bit: 10ms: 160b, 20ms: 320b, 30ms: 480b + this.frameSize = 320; + // console.log("setup VAD bitrate", bitrate, "level", level); + return true; + } + + return false; + } + + process(chunk) { + if (!this._instance) + return false; + let n = chunk.length % this.frameSize, r = 0; + for (let i = 0; i < n; i++) + r += this._instance.process(chunk.slice(i * this.frameSize, this.frameSize)); + return r; + } +} + class Platform extends Tp.BasePlatform { // Initialize the platform code // Will be called before instantiating the engine @@ -300,6 +339,9 @@ class Platform extends Tp.BasePlatform { this._ensurePulseConfig(); }); this._wakeWordDetector = new WakeWordDetector(); + this._voiceDetector = null; + if (webrtcvad && VAD) + this._voiceDetector = new VAD(); this._sqliteKey = null; } @@ -428,6 +470,9 @@ class Platform extends Tp.BasePlatform { case 'pulseaudio': return true; + case 'voice-detector': + return this._voiceDetector !== null; + case 'bluetooth': // temporarily disabled return false; @@ -468,7 +513,8 @@ class Platform extends Tp.BasePlatform { case 'wakeword-detector': return this._wakeWordDetector; - + case 'voice-detector': + return this._voiceDetector; case 'app-launcher': return this._appLauncher; case 'system-lock':