This repository has been archived by the owner on Jun 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
SED.py
64 lines (58 loc) · 2.43 KB
/
SED.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
print('Loading TensorFlow...')
import numpy as np
#import scipy.signal
import soundfile as sf
import tensorflow as tf
print('Loading YAMNet...')
import params as yamnet_params
import yamnet as yamnet_model
params = yamnet_params.Params()
yamnet = yamnet_model.yamnet_frames_model(params)
yamnet.load_weights('yamnet.h5')
yamnet_classes = yamnet_model.class_names('yamnet_class_map_zh-tw.csv')
import os, pyaudio, time
#os.system('jack_control start')
p = pyaudio.PyAudio()
os.system('clear')
#print('Sound Event Detection by running inference on every 1.024 second audio stream from the microphone!\n')
CHUNK = 1024 # frames_per_buffer # samples per chunk
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 1.024 # need at least 975 ms
INFERENCE_WINDOW = 2 * int(RATE / CHUNK * RECORD_SECONDS) # 2 * 16 CHUNKs
THRESHOLD = 0.4
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
CHUNKs = []
with open('sed.npy', 'ab') as f:
while True:
try:
stream.start_stream()
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
CHUNKs.append(data)
# print(len(CHUNKs))
stream.stop_stream()
if len(CHUNKs) > INFERENCE_WINDOW:
CHUNKs = CHUNKs[int(RATE / CHUNK * RECORD_SECONDS):]
# print('new len: ',len(CHUNKs))
wav_data = np.frombuffer(b''.join(CHUNKs), dtype=np.int16)
waveform = wav_data / tf.int16.max#32768.0
waveform = waveform.astype('float32')
scores, embeddings, spectrogram = yamnet(waveform)
prediction = np.mean(scores[:-1], axis=0) # last one scores comes from insufficient samples
# assert (prediction==scores[0]).numpy().all() # only one scores at RECORD_SECONDS = 1.024
assert len(scores[:-1]) == CHUNK * len(CHUNKs) / RATE // 0.48 - 1 # hop 0.48 seconds
top5 = np.argsort(prediction)[::-1][:5]
print(time.ctime().split()[3],
''.join((f" {prediction[i]:.2f} 👉{yamnet_classes[i][:7].ljust(7, ' ')}" if prediction[i] >= THRESHOLD else '') for i in top5))
np.save(f, np.concatenate(([time.time()], prediction)))
except:
stream.stop_stream()
stream.close()
p.terminate()
f.close()