Skip to content

Commit

Permalink
Add VAD platform capability through webrtcvad
Browse files Browse the repository at this point in the history
  • Loading branch information
ad31c0 committed Jun 7, 2021
1 parent fee3478 commit 487db43
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 33 deletions.
69 changes: 42 additions & 27 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@
"byline": "^5.0.0",
"canberra": "^0.1.2",
"dbus-native": "^0.4.0",
"genie-toolkit": "~0.8.0",
"genie-toolkit": "github:stanford-oval/genie-toolkit",
"keytar": "^7.7.0",
"node-gettext": "^3.0.0",
"pulseaudio2": "^0.5.0",
"snowboy": "^1.3.1",
"thingpedia": "~2.9.0"
"thingpedia": "~2.9.0",
"webrtcvad": "^1.0.1"
},
"devDependencies": {
"eslint": "^7.27.0"
Expand Down
3 changes: 1 addition & 2 deletions service/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@

module.exports.SEMPRE_URL = 'https://nlp.almond.stanford.edu';
module.exports.THINGPEDIA_URL = 'https://thingpedia.stanford.edu/thingpedia';
module.exports.MS_SPEECH_RECOGNITION_PRIMARY_KEY = 'de1f02817356494483ba502b2ce95f6f';
module.exports.MS_SPEECH_RECOGNITION_SECONDARY_KEY = '3dc6ce0b832940f0b0c984a1517c457e';
module.exports.NL_URL = process.env.THINGENGINE_NL_URL || 'https://nlp-staging.almond.stanford.edu';
2 changes: 1 addition & 1 deletion service/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ class AppControlChannel extends events.EventEmitter {
this._history = [];

this._speechHandler = new Genie.SpeechHandler(this._conversation, _engine.platform, {
subscriptionKey: Config.MS_SPEECH_RECOGNITION_PRIMARY_KEY
nlUrl: Config.NL_URL
});

let play;
Expand Down
48 changes: 47 additions & 1 deletion service/platform/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,45 @@ class SystemSettings {
}
}

let webrtcvad;
try {
webrtcvad = require('webrtcvad').default;
} catch(e) {
console.log("VAD not available");
webrtcvad = null;
}

class VAD {
constructor() {
this._instance = null;
this.frameSize = 0;
}

setup(bitrate, level) {
if (this._instance)
this._instance = null;

if (webrtcvad) {
this._instance = new webrtcvad(bitrate, level);
// 16khz audio single-channel 16 bit: 10ms: 160b, 20ms: 320b, 30ms: 480b
this.frameSize = 320;
// console.log("setup VAD bitrate", bitrate, "level", level);
return true;
}

return false;
}

process(chunk) {
if (!this._instance)
return false;
let n = chunk.length % this.frameSize, r = 0;
for (let i = 0; i < n; i++)
r += this._instance.process(chunk.slice(i * this.frameSize, this.frameSize));
return r;
}
}

class Platform extends Tp.BasePlatform {
// Initialize the platform code
// Will be called before instantiating the engine
Expand Down Expand Up @@ -300,6 +339,9 @@ class Platform extends Tp.BasePlatform {
this._ensurePulseConfig();
});
this._wakeWordDetector = new WakeWordDetector();
this._voiceDetector = null;
if (webrtcvad && VAD)
this._voiceDetector = new VAD();

this._sqliteKey = null;
}
Expand Down Expand Up @@ -428,6 +470,9 @@ class Platform extends Tp.BasePlatform {
case 'pulseaudio':
return true;

case 'voice-detector':
return this._voiceDetector !== null;

case 'bluetooth':
// temporarily disabled
return false;
Expand Down Expand Up @@ -468,7 +513,8 @@ class Platform extends Tp.BasePlatform {

case 'wakeword-detector':
return this._wakeWordDetector;

case 'voice-detector':
return this._voiceDetector;
case 'app-launcher':
return this._appLauncher;
case 'system-lock':
Expand Down

0 comments on commit 487db43

Please sign in to comment.