-
Notifications
You must be signed in to change notification settings - Fork 230
/
Copy pathaudio.cpp
458 lines (411 loc) · 15.2 KB
/
audio.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
//
// Handle microphone input, and speaker output via amp.
// The microphone input, and the output to amplifier, each make use of a
// separate I2S peripheral in the ESP32 or ESP32S3.
// I2S and PDM microphones are supported.
// I2S amplifiers are supported.
//
// If using I2S mic and I2S amp, then the following pins should be set to same values:
// - micSckPin = mampBckIo
// - micSWsPin = mampSwsIo
//
// A browser microphone and on a PC or phone can be used:
// - for VoiceChanger app, this is used instead of local mic
// - need to press PC Mic button before selecting an action
// - for MJPEG2SD app, this is passed thru to speaker, independent of local mic
// - need to enable use amp and pins in Config / Peripherals for Start Mic button to be available on web page
// - browser mic should only be activated when need to speak
// Windows needs to allow microphone use in Microphone Privacy Settings
// In Microphone Properties / Advanced, check bit depth and sample rate (normally 16 bit 48kHz)
// Chrome needs to allow access to mic from insecure (http) site:
// Go to : chrome://flags/#unsafely-treat-insecure-origin-as-secure
// Enter app URL in box: http://<app_ip_address>
//
// s60sc 2024
#include "appGlobals.h"
#if INCLUDE_AUDIO
#include <ESP_I2S.h>
I2SClass I2Spdm;
I2SClass I2Sstd;
// On ESP32, only I2S1 available with camera
i2s_port_t MIC_CHAN = I2S_NUM_1;
i2s_port_t AMP_CHAN = I2S_NUM_0;
static bool micUse = false; // esp mic available
bool micRem = false; // use browser mic (depends on app)
static bool ampUse = false; // whether esp amp / speaker available
bool spkrRem = false; // use browser speaker
bool volatile stopAudio = false;
static bool micRecording = false;
// I2S devices
bool I2Smic; // true if I2S, false if PDM
// I2S SCK and I2S BCLK can share same pin
// I2S external Microphone pins
// INMP441 I2S microphone pinout, connect L/R to GND for left channel
// MP34DT01 PDM microphone pinout, connect SEL to GND for left channel
int micSckPin = -1; // I2S SCK
int micSWsPin = -1; // I2S WS, PDM CLK
int micSdPin = -1; // I2S SD, PDM DAT
// I2S Amplifier pins
// MAX98357A
// SD leave as mono (unconnected)
// Gain: 100k to GND works, not direct to GND. Unconnected is 9 dB
int mampBckIo = -1; // I2S BCLK or SCK
int mampSwsIo = -1; // I2S LRCLK or WS
int mampSdIo = -1; // I2S DIN
int ampTimeout = 1000; // ms for amp write abandoned if no output
uint32_t SAMPLE_RATE = 16000; // audio rate in Hz
int micGain = 0; // microphone gain 0 is off
int8_t ampVol = 0; // amplifier volume factor 0 is off
TaskHandle_t audioHandle = NULL;
static int totalSamples = 0;
static const uint8_t sampleWidth = sizeof(int16_t);
const size_t sampleBytes = DMA_BUFF_LEN * sampleWidth;
int16_t* sampleBuffer = NULL;
static uint8_t* wsBuffer = NULL;
static size_t wsBufferLen = 0;
uint8_t* audioBuffer = NULL; // VC recording buffer or cam mic input streamed to NVR
size_t audioBytes = 0;
static const char* micLabels[2] = {"PDM", "I2S"};
#ifdef CONFIG_IDF_TARGET_ESP32S3
#define psramMax (ONEMEG * 6)
#else
#define psramMax (ONEMEG * 2)
#endif
#ifdef ISCAM
bool AudActive = false; // whether to show audio features
static File wavFile;
#endif
static uint8_t wavHeader[WAV_HDR_LEN] = { // WAV header template
0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56, 0x45, 0x66, 0x6D, 0x74, 0x20,
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x11, 0x2B, 0x00, 0x00, 0x11, 0x2B, 0x00, 0x00,
0x02, 0x00, 0x10, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00,
};
void applyVolume() {
// determine required volume setting
int8_t adjVol = ampVol * 2; // use web page setting
#ifdef ISVC
adjVol = checkPotVol(adjVol); // use potentiometer setting if available
#endif
if (adjVol) {
// increase or reduce volume, 6 is unity eg midpoint of pot / web slider
adjVol = adjVol > 5 ? adjVol - 5 : adjVol - 7;
// apply volume control to samples
for (int i = 0; i < DMA_BUFF_LEN; i++) {
// apply volume control
sampleBuffer[i] = adjVol < 0 ? sampleBuffer[i] / abs(adjVol) : constrain((int32_t)sampleBuffer[i] * adjVol, SHRT_MIN, SHRT_MAX);
}
} // else turn off volume
}
static bool setupMic() {
bool res;
if (I2Smic) {
// I2S mic and I2S amp can share same I2S channel
I2Sstd.setPins(micSckPin, micSWsPin, mampSdIo, micSdPin, -1); // BCLK/SCK, LRCLK/WS, SDOUT, SDIN, MCLK
res = I2Sstd.begin(I2S_MODE_STD, SAMPLE_RATE, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO, I2S_STD_SLOT_LEFT);
} else {
// PDM mic need separate channel to I2S
I2Spdm.setPinsPdmRx(micSWsPin, micSdPin);
res = I2Spdm.begin(I2S_MODE_PDM_RX, SAMPLE_RATE, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO, I2S_STD_SLOT_LEFT);
}
return res;
}
static bool setupAmp() {
bool res = true;
if (!micUse || !I2Smic) {
// if not already started by setupMic()
I2Sstd.setPins(mampBckIo, mampSwsIo, mampSdIo, -1, -1); // BCLK/SCK, LRCLK/WS, SDOUT, SDIN, MCLK
res = I2Sstd.begin(I2S_MODE_STD, SAMPLE_RATE, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO, I2S_STD_SLOT_LEFT);
} // already started by setupMic()
return res;
}
void closeI2S() {
I2Sstd.end();
I2Spdm.end();
}
static void applyMicGain(size_t bytesRead) {
// change esp mic gain by required factor
uint8_t gainFactor = pow(2, micGain - MIC_GAIN_CENTER);
for (int i = 0; i < bytesRead / sampleWidth; i++) {
sampleBuffer[i] = constrain(sampleBuffer[i] * gainFactor, SHRT_MIN, SHRT_MAX);
}
}
static size_t espMicInput() {
// read esp mic
size_t bytesRead = 0;
if (micUse) {
bytesRead = I2Smic ? I2Sstd.readBytes((char*)sampleBuffer, sampleBytes) : I2Spdm.readBytes((char*)sampleBuffer, sampleBytes);
applyMicGain(bytesRead);
}
return bytesRead;
}
size_t updateWavHeader() {
// update wav header
uint32_t dataBytes = totalSamples * sampleWidth;
uint32_t wavFileSize = dataBytes ? dataBytes + WAV_HDR_LEN - 8 : 0; // wav file size excluding chunk header
memcpy(wavHeader+4, &wavFileSize, 4);
memcpy(wavHeader+24, &SAMPLE_RATE, 4); // sample rate
uint32_t byteRate = SAMPLE_RATE * sampleWidth; // byte rate (SampleRate * NumChannels * BitsPerSample/8)
memcpy(wavHeader+28, &byteRate, 4);
memcpy(wavHeader+WAV_HDR_LEN-4, &dataBytes, 4); // wav data size
memcpy(audioBuffer, wavHeader, WAV_HDR_LEN);
return dataBytes;
}
/*********************************************************************/
#ifdef ISVC
static size_t micInput() {
// get input from browser mic or else esp mic
size_t bytesRead = (micRem) ? wsBufferLen : espMicInput();
if (bytesRead && micRem) {
// double buffer browser mic input
memcpy(sampleBuffer, wsBuffer, bytesRead);
wsBufferLen = 0;
applyMicGain(bytesRead);
} else if (micRem) delay(20);
return bytesRead;
}
void browserMicInput(uint8_t* wsMsg, size_t wsMsgLen) {
// input from browser mic via websocket
if (micRem && !wsBufferLen) {
// copy browser mic input into sampleBuffer for amp
wsBufferLen = wsMsgLen;
memcpy(wsBuffer, wsMsg, wsMsgLen);
}
}
static void ampOutput(size_t bytesRead = sampleBytes) {
// output to amplifier, apply required filtering and volume
applyFilters();
if (spkrRem) wsAsyncSendBinary((uint8_t*)sampleBuffer, bytesRead); // browser speaker
else if (ampUse) I2Sstd.write((uint8_t*)sampleBuffer, bytesRead); // esp amp speaker
if (!RTSPAudioBytes) {
// fill audio buffer to send to RTSP
memcpy(RTSPAudioBuffer, sampleBuffer, bytesRead);
RTSPAudioBytes = bytesRead;
}
displayAudioLed(sampleBuffer[0]);
}
static void passThru() {
// play buffer from mic direct to amp
size_t bytesRead = micInput();
if (bytesRead) ampOutput(bytesRead);
}
static void makeRecording() {
if (psramFound()) {
LOG_INF("Recording ...");
audioBytes = WAV_HDR_LEN; // leave space for wave header
wsBufferLen = 0;
while (audioBytes < psramMax) {
size_t bytesRead = micInput();
if (bytesRead) {
memcpy(audioBuffer + audioBytes, sampleBuffer, bytesRead);
audioBytes += bytesRead;
}
if (stopAudio) break;
} // psram full
if (!stopAudio) wsJsonSend("stopRec", "1");
totalSamples = (audioBytes - WAV_HDR_LEN) / sampleWidth;
LOG_INF("%s recording of %d samples", stopAudio ? "Stopped" : "Finished", totalSamples);
stopAudio = true;
} else LOG_WRN("PSRAM needed to record and play");
}
static void playRecording() {
if (psramFound()) {
LOG_INF("Playing %d samples, initial volume: %d", totalSamples, ampVol);
for (int i = WAV_HDR_LEN; i < totalSamples * sampleWidth; i += sampleBytes) {
memcpy(sampleBuffer, audioBuffer+i, sampleBytes);
ampOutput();
if (stopAudio) break;
}
if (!stopAudio) wsJsonSend("stopPlay", "1");
LOG_INF("%s playing of %d samples", stopAudio ? "Stopped" : "Finished", totalSamples);
stopAudio = true;
} else LOG_WRN("PSRAM needed to record and play");
}
static void VCactions() {
// action user request
stopAudio = false;
closeI2S();
prepAudio();
setupFilters();
switch (THIS_ACTION) {
case RECORD_ACTION:
if (micRem) wsAsyncSendText("#M1");
if (micUse || micRem) makeRecording();
break;
case PLAY_ACTION:
// continues till stopped
if (ampUse || spkrRem || RTSPAudio) playRecording(); // play previous recording
break;
case PASS_ACTION:
if (ampUse || spkrRem || RTSPAudio) {
if (micRem) wsAsyncSendText("#M1");
LOG_INF("Passthru started");
wsBufferLen = 0;
while (!stopAudio) passThru();
LOG_INF("Passthru stopped");
}
break;
default:
break;
}
displayAudioLed(0);
xSemaphoreGive(audioSemaphore);
}
#endif
/*****************************************************************/
#ifdef ISCAM
void browserMicInput(uint8_t* wsMsg, size_t wsMsgLen) {
// input from browser mic via websocket, send to esp amp
if (micRem && !wsBufferLen) {
wsBufferLen = wsMsgLen;
memcpy(wsBuffer, wsMsg, wsMsgLen);
int8_t adjVol = ampVol * 2; // use web page setting
if (adjVol) {
// increase or reduce volume, 6 is unity eg midpoint of web slider
adjVol = adjVol > 5 ? adjVol - 5 : adjVol - 7;
// apply volume control to samples
int16_t* wsPtr = (int16_t*) wsBuffer;
for (int i = 0; i < wsBufferLen / sizeof(int16_t); i++) {
// apply volume control
wsPtr[i] = adjVol < 0 ? wsPtr[i] / abs(adjVol) : constrain((int32_t)wsPtr[i] * adjVol, SHRT_MIN, SHRT_MAX);
}
}
I2Sstd.write(wsBuffer, wsBufferLen);
wsBufferLen = 0;
}
}
void startAudioRecord() {
// called from openAvi() in mjpeg2sd.cpp
// start audio recording and write recorded audio to SD card as WAV file
// combined into AVI file as PCM channel on FTP upload or browser download
// so can be read by media players
if (micUse && micGain) {
wavFile = STORAGE.open(WAVTEMP, FILE_WRITE);
wavFile.write(wavHeader, WAV_HDR_LEN);
micRecording = true;
totalSamples = 0;
} else {
micRecording = false;
LOG_WRN("No ESP mic defined or mic is off");
}
}
void finishAudioRecord(bool isValid) {
// called from closeAvi() in mjpeg2sd.cpp
if (micRecording) {
// finish a recording and save if valid
micRecording = false;
if (isValid) {
size_t dataBytes = updateWavHeader();
wavFile.seek(0, SeekSet); // start of file
wavFile.write(wavHeader, WAV_HDR_LEN); // overwrite default header
wavFile.close();
LOG_INF("Captured %d audio samples with gain factor %i", totalSamples, micGain - MIC_GAIN_CENTER);
LOG_INF("Saved %s to SD for %s", fmtSize(dataBytes + WAV_HDR_LEN), WAVTEMP);
}
}
}
static void camActions() {
// apply esp mic input to required outputs
while (true) {
size_t bytesRead = 0;
if (micRecording || !audioBytes || spkrRem) bytesRead = espMicInput(); // load sampleBuffer
if (bytesRead) {
if (micRecording) {
// record mic input to SD
wavFile.write((uint8_t*)sampleBuffer, bytesRead);
totalSamples += bytesRead / sampleWidth;
}
if (!audioBytes) {
// fill audioBuffer to send to NVR
memcpy(audioBuffer, sampleBuffer, bytesRead);
audioBytes = bytesRead;
}
// intercom esp mic to browser speaker
if (spkrRem) wsAsyncSendBinary((uint8_t*)sampleBuffer, bytesRead);
} else delay(20);
}
}
#endif
/************************************************************************/
void setI2Schan(int whichChan) {
// set I2S port for microphone, amp is opposite
if (whichChan) {
MIC_CHAN = I2S_NUM_1;
AMP_CHAN = I2S_NUM_0;
} else {
MIC_CHAN = I2S_NUM_0;
AMP_CHAN = I2S_NUM_1;
}
}
static void predefPins() {
#if defined(I2S_SD)
char micPin[3];
sprintf(micPin, "%d", I2S_SD);
updateStatus("micSdPin", micPin);
sprintf(micPin, "%d", I2S_WS);
updateStatus("micSWsPin", micPin);
sprintf(micPin, "%d", I2S_SCK);
updateStatus("micSckPin", micPin);
#endif
I2Smic = micSckPin == -1 ? false : true;
#ifdef CONFIG_IDF_TARGET_ESP32S3
MIC_CHAN = I2S_NUM_0;
#endif
}
static void audioTask(void* parameter) {
// loops to service each requirement for audio processing
while (true) {
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
#ifdef ISCAM
camActions(); // runs constantly
#endif
#ifdef ISVC
VCactions(); // runs once
#endif
}
vTaskDelete(NULL);
}
void prepAudio() {
// VC uses audio task for all activities
// Cam uses audio task for microphone and intercom task for amplifier
#ifdef ISCAM
predefPins();
#endif
if (MIC_CHAN == I2S_NUM_1 && !I2Smic) LOG_WRN("Only I2S devices supported on I2S_NUM_1");
else {
if (micSdPin <= 0) LOG_WRN("Microphone pins not defined");
else {
micUse = setupMic();
if (micUse) LOG_INF("Sound capture is available using %s mic on I2S%i with gain %d", micLabels[I2Smic], MIC_CHAN, micGain);
else LOG_WRN("Unable to start ESP mic");
}
if (mampSdIo <= 0) LOG_WRN("Amplifier pins not defined");
else {
ampUse = setupAmp();
if (ampUse) LOG_INF("Speaker output is available using I2S amp on I2S%i with vol %d", AMP_CHAN, ampVol);
else LOG_WRN("Unable to start ESP amp");
}
}
if (sampleBuffer == NULL) sampleBuffer = (int16_t*)malloc(sampleBytes);
if (wsBuffer == NULL) wsBuffer = (uint8_t*)malloc(MAX_PAYLOAD_LEN);
#ifdef ISVC
if (audioBuffer == NULL && psramFound()) audioBuffer = (uint8_t*)ps_malloc(psramMax + (sizeof(int16_t) * DMA_BUFF_LEN));
#endif
#ifdef ISCAM
if (audioBuffer == NULL && psramFound()) audioBuffer = (uint8_t*)ps_malloc(sampleBytes);
#endif
#ifdef ISVC
// VC can still use audio task without esp mic or amp
if (!micUse && !ampUse) LOG_WRN("Only browser mic and speaker can be used");
#endif
#ifdef ISCAM
wsBufferLen = 0;
// Audio task only needed for esp microphone
if (!micUse) return;
#endif
if (audioHandle == NULL) xTaskCreate(audioTask, "audioTask", AUDIO_STACK_SIZE, NULL, AUDIO_PRI, &audioHandle);
#ifdef ISCAM
xTaskNotifyGive(audioHandle);
#endif
debugMemory("prepAudio");
}
#endif