-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustommels.py
63 lines (46 loc) · 2.12 KB
/
custommels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import librosa
import numpy as np
# replicating code from librosa but changing break freq from 700 -> 1750
def hz_to_mel(frequencies, break_freq):
frequencies = np.array(frequencies)
return 2595.0 * np.log10(1.0 + frequencies / break_freq)
def mel_frequencies(n_mels, fmin, fmax, break_freq):
min_mel = hz_to_mel(fmin, break_freq)
max_mel = hz_to_mel(fmax, break_freq)
mels = np.linspace(min_mel, max_mel, n_mels)
return break_freq * (10.0 ** (mels / 2595.0) - 1.0)
def mel_f(sr, n_mels, fmin, fmax, n_fft, break_freq):
# Initialize the weights
n_mels = int(n_mels)
weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=np.float32)
# Center freqs of each FFT bin
fftfreqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
# 'Center freqs' of mel bands - uniformly spaced between limits
mel_f = mel_frequencies(n_mels + 2, fmin, fmax, break_freq)
fdiff = np.diff(mel_f)
ramps = np.subtract.outer(mel_f, fftfreqs)
for i in range(n_mels):
# lower and upper slopes for all bins
lower = -ramps[i] / fdiff[i]
upper = ramps[i + 2] / fdiff[i + 1]
# .. then intersect them with each other and zero
weights[i] = np.maximum(0, np.minimum(lower, upper))
# slaney
# Slaney-style mel is scaled to be approx constant energy per channel
enorm = 2.0 / (mel_f[2 : n_mels + 2] - mel_f[:n_mels])
weights *= enorm[:, np.newaxis]
# Only check weights if f_mel[0] is positive
if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
# This means we have an empty channel somewhere
print(
"Empty filters detected in mel frequency basis. "
"Some channels will produce empty responses. "
"Try increasing your sampling rate (and fmax) or "
"reducing n_mels."
)
return weights
def mel_spec(stft, sr, n_fft, hop_length, n_mels, fmin, fmax, break_freq=1750, power=2):
# fft_windows = librosa.stft(data, n_fft=n_fft, hop_length=hop_length)
magnitude = np.abs(stft) ** power
mels = mel_f(sr, n_mels, fmin, fmax, n_fft, break_freq)
return mels.dot(magnitude)