Skip to content

Commit

Permalink
New methods for rescaling linear spectrograms
Browse files Browse the repository at this point in the history
Issue #332 Add in new methods for rescaling frequency but keeping linear.
  • Loading branch information
towsey committed Aug 19, 2020
1 parent 5fa1979 commit f10ea68
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 49 deletions.
27 changes: 26 additions & 1 deletion src/AudioAnalysisTools/DSP/FrequencyScale.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,23 @@ public FrequencyScale(int nyquist, int frameSize, int hertzGridInterval)
this.GridLineLocations = GetLinearGridLineLocations(nyquist, this.HertzGridInterval, this.FinalBinCount);
}

/// <summary>
/// Initializes a new instance of the <see cref="FrequencyScale"/> class.
/// CONSTRUCTOR
/// Call this constructor when want to change freq scale but keep linear.
/// </summary>
public FrequencyScale(int nyquist, int frameSize, int finalBinCount, int hertzGridInterval)
{
this.ScaleType = FreqScaleType.Linear;
this.Nyquist = nyquist;
this.WindowSize = frameSize;
this.FinalBinCount = finalBinCount;
this.HertzGridInterval = hertzGridInterval;
this.LinearBound = nyquist;
this.BinBounds = this.GetLinearBinBounds();
this.GridLineLocations = GetLinearGridLineLocations(nyquist, this.HertzGridInterval, this.FinalBinCount);
}

/// <summary>
/// Initializes a new instance of the <see cref="FrequencyScale"/> class.
/// CONSTRUCTOR
Expand Down Expand Up @@ -259,11 +276,13 @@ public int GetBinIdInReducedSpectrogramForHerzValue(int herzValue)
public int[,] GetLinearBinBounds()
{
double herzInterval = this.Nyquist / (double)this.FinalBinCount;
double scaleFactor = this.WindowSize / 2 / (double)this.FinalBinCount;

var binBounds = new int[this.FinalBinCount, 2];

for (int i = 0; i < this.FinalBinCount; i++)
{
binBounds[i, 0] = i;
binBounds[i, 0] = (int)Math.Round(i * scaleFactor);
binBounds[i, 1] = (int)Math.Round(i * herzInterval);
}

Expand Down Expand Up @@ -331,6 +350,12 @@ public static void DrawFrequencyLinesOnImage(Image<Rgb24> bmp, int[,] gridLineLo
int height = bmp.Height;
int bandCount = gridLineLocations.GetLength(0);

if (gridLineLocations == null || bmp.Height < 50)
{
// there is no point placing gridlines on a narrow image. It obscures too much spectrogram.
return;
}

// draw the grid line for each frequency band
for (int b = 0; b < bandCount; b++)
{
Expand Down
93 changes: 46 additions & 47 deletions src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ public BaseSonogram(SonogramConfig config, FrequencyScale freqScale, WavReader w

this.FreqScale = freqScale;
this.InitialiseSpectrogram(wav);

if (this.FreqScale.ScaleType == FreqScaleType.Linear && this.FreqScale.WindowSize != this.FreqScale.FinalBinCount)
{
// convert the spectrogram frequency scale
this.Data = RescaleLinearFrequencyScale(this.Data, this.FreqScale);
}

this.Make(this.Data);
}

Expand Down Expand Up @@ -200,6 +207,41 @@ private void InitialiseSpectrogram(WavReader wav)
}
}

public static double[,] RescaleLinearFrequencyScale(double[,] inputSpgram, FrequencyScale freqScale)
{
if (freqScale == null)
{
throw new ArgumentNullException(nameof(freqScale));
}

if (freqScale.ScaleType != FreqScaleType.Linear)
{
LoggedConsole.WriteLine("Require a Linear frequency scale for this method.");
throw new ArgumentNullException(nameof(freqScale));
}

// get the bin bounds for this scale type
var binBounds = freqScale.BinBounds;
int newBinCount = binBounds.GetLength(0);

// set up the new spectrogram
int frameCount = inputSpgram.GetLength(0);

double[,] opM = new double[frameCount, newBinCount];

for (int row = 0; row < frameCount; row++)
{
//get each frame or spectrum in turn and rescale.
var linearSpectrum = MatrixTools.GetRow(inputSpgram, row);
var rescaledSpectrum = SpectrogramStandard.RescaleSpectrumUsingFilterbank(binBounds, linearSpectrum);

//return the spectrum to output spectrogram.
MatrixTools.SetRow(opM, row, rescaledSpectrum);
}

return opM;
}

/// <summary>
/// Calculates SNR, ENERGY PER FRAME and NORMALISED dB PER FRAME.
/// </summary>
Expand Down Expand Up @@ -258,7 +300,10 @@ public Image<Rgb24> GetImageFullyAnnotated(Image<Rgb24> image, string title, int
throw new ArgumentNullException(nameof(image));
}

FrequencyScale.DrawFrequencyLinesOnImage(image, gridLineLocations, includeLabels: true);
if (gridLineLocations != null)
{
FrequencyScale.DrawFrequencyLinesOnImage(image, gridLineLocations, includeLabels: true);
}

// collect all the images and combine.
var titleBar = DrawTitleBarOfGrayScaleSpectrogram(title, image.Width, tag);
Expand Down Expand Up @@ -665,51 +710,5 @@ public static Image<Rgb24> DrawTitleBarOfGrayScaleSpectrogram(string title, int

return bmp;
}

/*
// mark of time scale according to scale.
public static Image<Rgb24> DrawTimeTrack(TimeSpan offsetMinute, TimeSpan xAxisPixelDuration, TimeSpan xAxisTicInterval, TimeSpan labelInterval, int trackWidth, int trackHeight, string title)
{
var bmp = new Image<Rgb24>(trackWidth, trackHeight);
bmp.Mutate(g =>
{
g.Clear(Color.Black);
double elapsedTime = offsetMinute.TotalSeconds;
double pixelDuration = xAxisPixelDuration.TotalSeconds;
int labelSecondsInterval = (int)labelInterval.TotalSeconds;
var whitePen = new Pen(Color.White, 1);
var stringFont = Drawing.Arial8;
// for columns, draw in second lines
double xInterval = (int)(xAxisTicInterval.TotalMilliseconds / xAxisPixelDuration.TotalMilliseconds);
// for pixels in the line
for (int x = 1; x < trackWidth; x++)
{
elapsedTime += pixelDuration;
if (x % xInterval <= pixelDuration)
{
g.DrawLine(whitePen, x, 0, x, trackHeight);
int totalSeconds = (int)Math.Round(elapsedTime);
if (totalSeconds % labelSecondsInterval == 0)
{
int minutes = totalSeconds / 60;
int seconds = totalSeconds % 60;
string time = $"{minutes}m{seconds}s";
g.DrawTextSafe(time, stringFont, Color.White, new PointF(x + 1, 2)); //draw time
}
}
}
g.DrawLine(whitePen, 0, 0, trackWidth, 0); //draw upper boundary
g.DrawLine(whitePen, 0, trackHeight - 1, trackWidth, trackHeight - 1); //draw lower boundary
g.DrawLine(whitePen, trackWidth, 0, trackWidth, trackHeight - 1); //draw right end boundary
g.DrawTextSafe(title, stringFont, Color.White, new PointF(4, 3));
});
return bmp;
}
*/
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace AudioAnalysisTools.StandardSpectrograms

public class SpectrogramStandard : BaseSonogram
{
//There are five CONSTRUCTORS
//There are six CONSTRUCTORS

/// <summary>
/// Initializes a new instance of the <see cref="SpectrogramStandard"/> class.
Expand All @@ -28,6 +28,18 @@ public SpectrogramStandard(SonogramConfig config, WavReader wav)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="SpectrogramStandard"/> class.
/// Use this constructor when want to increase or decrease the linear frquency scale.
/// </summary>
/// <param name="config">Other info to construct the spectrogram.</param>
/// <param name="scale">The required new frequency scale.</param>
/// <param name="wav">The recording.</param>
public SpectrogramStandard(SonogramConfig config, FrequencyScale scale, WavReader wav)
: base(config, scale, wav)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="SpectrogramStandard"/> class.
/// Use this constructor when you want to init a new Spectrogram by extracting portion of an existing sonogram.
Expand Down Expand Up @@ -133,5 +145,88 @@ public override void Make(double[,] amplitudeM)
this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile
}
}

/// <summary>
/// Converts a single linear spectrum to octave scale spectrum.
/// </summary>
public static double[] RescaleSpectrumUsingFilterbank(int[,] transformMatrix, double[] linearSpectrum)
{
int length = transformMatrix.GetLength(0);
var rescaledSpectrum = new double[length];

// Fill in the first value of the rescaled spectrum
int lowIndex1 = transformMatrix[0, 0];
int centreIndex1 = transformMatrix[0, 0];
int highIndex1 = transformMatrix[1, 0];
rescaledSpectrum[0] = FilterbankIntegral(linearSpectrum, lowIndex1, centreIndex1, highIndex1);

// fill in remainder except last
for (int i = 1; i < length - 1; i++)
{
int lowIndex = transformMatrix[i - 1, 0];
int centreIndex = transformMatrix[i, 0];
int highIndex = transformMatrix[i + 1, 0];
if (highIndex >= linearSpectrum.Length)
{
highIndex = linearSpectrum.Length - 1;
}

rescaledSpectrum[i] = FilterbankIntegral(linearSpectrum, lowIndex, centreIndex, highIndex);
}

// now fill in the last value of the rescaled spectrum
int lowIndex2 = transformMatrix[length - 2, 0];
int centreIndex2 = transformMatrix[length - 1, 0];
int highIndex2 = transformMatrix[length - 1, 0];
rescaledSpectrum[length - 1] = FilterbankIntegral(linearSpectrum, lowIndex2, centreIndex2, highIndex2);

return rescaledSpectrum;
}

public static double FilterbankIntegral(double[] spectrum, int lowIndex, int centreIndex, int highIndex)
{
// let k = index into spectral vector.
// for all k < lowIndex, filterBank[k] = 0;
// for all k > highIndex, filterBank[k] = 0;

// for all k in range (lowIndex <= k < centreIndex), filterBank[k] = (k-lowIndex) /(centreIndex - lowIndex)
// for all k in range (centreIndex <= k <= highIndex), filterBank[k] = (highIndex-k)/(highIndex - centreIndex)

double area = 0.0;
double integral = 0.0;
int delta = centreIndex - lowIndex;
if (delta > 0)
{
for (int k = lowIndex; k < centreIndex; k++)
{
double weight = (k - lowIndex) / (double)delta;
integral += weight * spectrum[k];
area += weight;
}
}

integral += spectrum[centreIndex];
area += 1.0;

delta = highIndex - centreIndex;
if (delta > 0)
{
for (int k = centreIndex + 1; k <= highIndex; k++)
{
if (delta == 0)
{
continue;
}

double weight = (highIndex - k) / (double)delta;
integral += weight * spectrum[k];
area += weight;
}
}

// NormaliseMatrixValues to area of the triangular filter
integral /= area;
return integral;
}
}
}

0 comments on commit f10ea68

Please sign in to comment.