forked from imfing/audio-classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
feat_extract.py
101 lines (87 loc) · 3.59 KB
/
feat_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python
# coding= UTF-8
#
# Author: Fing
# Date : 2017-12-03
#
import code
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf
import sounddevice as sd
import queue
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
def extract_feature(file_name=None):
if file_name:
#print('Extracting', file_name)
#X, sample_rate = sf.read(file_name, dtype='float32')
#X, sample_rate = sf.read(file_name, dtype="float64")
X, sample_rate = librosa.load(file_name)
else:
device_info = sd.query_devices(None, 'input')
sample_rate = int(device_info['default_samplerate'])
q = queue.Queue()
def callback(i,f,t,s): q.put(i.copy())
data = []
with sd.InputStream(samplerate=sample_rate, callback=callback):
while True:
if len(data) < 100000: data.extend(q.get())
else: break
X = np.array(data)
if X.ndim > 1: X = X[:,0]
X = X.T
# short term fourier transform
stft = np.abs(librosa.stft(X))
# mfcc (mel-frequency cepstrum)
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
# chroma
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
# melspectrogram
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
# spectral contrast
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
def parse_audio_files(parent_dir,file_ext='*.mp3'):
sub_dirs = os.listdir(parent_dir)
sub_dirs.sort()
features, labels = np.empty((0,193)), np.empty(0)
for label, sub_dir in tqdm(enumerate(sub_dirs)):
if os.path.isdir(os.path.join(parent_dir, sub_dir)):
for fn in tqdm(glob.glob(os.path.join(parent_dir, sub_dir, file_ext)), desc="loading label " + sub_dir):
try: mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
except Exception as e:
print("[Error] extract feature error in %s. %s" % (fn,e))
continue
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
# labels = np.append(labels, fn.split('/')[1])
labels = np.append(labels, label)
print("extract %s features done" % (sub_dir))
return np.array(features), np.array(labels, dtype = np.int)
def parse_predict_files(parent_dir,file_ext='*.mp3'):
features = np.empty((0,193))
filenames = []
for fn in glob.glob(os.path.join(parent_dir, file_ext)):
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
filenames.append(fn)
print("extract %s features done" % fn)
return np.array(features), np.array(filenames)
def main():
# Get features and labels
features, labels = parse_audio_files('/Users/celik/media/cat_sounds/development')
np.save('feat.npy', features)
np.save('label.npy', labels)
# Predict new
features, filenames = parse_predict_files('/Users/celik/media/cat_sounds/small_development/predict/Angry')
np.save('predict_feat.npy', features)
np.save('predict_filenames.npy', filenames)
if __name__ == '__main__': main()