Skip to content

Commit

Permalink
Work on Australiasian Pipit recognizer
Browse files Browse the repository at this point in the history
Issue #321 Transfer some post-processing steps from specific recognizer to GenericRecognizer class
  • Loading branch information
towsey committed Jun 5, 2020
1 parent 4b96cc8 commit 76cf9ff
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 195 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,26 @@ Profiles:
MaxHertz: 7000
MinBandwidthHertz: 500
MaxBandwidthHertz: 5000
MinDuration: 0.1
MaxDuration: 0.5
DecibelThreshold: 6.0
#MinDuration: 0.1
#MaxDuration: 0.5
DecibelThreshold: 9.0

#################### POST-PROCESSING of EVENTS ###################

# A: First post-processing steps are to combine overlapping/proximal/sequential events
# 1: Combine overlapping events
#CombineOverlappingEvents: false
CombineOverlappingEvents: true

# 2: Combine each pair of Boobook syllables as one event
# Can also use this to "mop up" events in neighbourhood - these can be removed later.
CombinePossibleSyllableSequence: false
SyllableStartDifference: 3.0
SyllableHertzGap: 35
# 2: Combine syllables that possibly belong to the saem strophe.
# Can also use this to "mop up" events in neighbourhood - for later removal.
CombinePossibleSyllableSequence: true
SyllableStartDifference: 0.25
SyllableHertzGap: 3000

# B: Filter the events for excess activity in their upper and lower buffer zones
LowerHertzBuffer: 150
UpperHertzBuffer: 400
NeighbourhoodLowerHertzBuffer: 200
NeighbourhoodUpperHertzBuffer: 0
NeighbourhoodDbThreshold: 9.0

# C: Options to save results files
# 4: Available options for saving data files (case-sensitive): [False/Never | True/Always | WhenEventsDetected]
Expand Down
198 changes: 16 additions & 182 deletions src/AnalysisPrograms/Recognizers/Birds/AnthusNovaeseelandiae.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,101 +97,36 @@ public override RecognizerResults Recognize(
//var newEvents = spectralEvents.Cast<EventCommon>().ToList();
//var spectralEvents = events.Select(x => (SpectralEvent)x).ToList();

// 1: Pull out the chirp events and calculate their frequency profiles.
var (whipEvents, others) = combinedResults.NewEvents.FilterForEventType<WhipEvent, EventCommon>();

// Uncomment the next line when want to obtain the event frequency profiles.
// WriteFrequencyProfiles(chirpEvents);

//foreach (var ev in whipEvents)
//{
// // Calculate frequency profile score for event
// SetFrequencyProfileScore((ChirpEvent)ev);
//}

if (combinedResults.NewEvents.Count == 0)
{
PipitLog.Debug($"Return zero events.");
return combinedResults;
}

// 2: Combine overlapping events. If the dB threshold is set low, may get lots of little events.
combinedResults.NewEvents = CompositeEvent.CombineOverlappingEvents(whipEvents.Cast<EventCommon>().ToList());
PipitLog.Debug($"Event count after combining overlaps = {combinedResults.NewEvents.Count}");

// 3: Combine proximal events. If the dB threshold is set low, may get lots of little events.
if (genericConfig.CombinePossibleSyllableSequence)
{
// Convert events to spectral events for combining of possible sequences.
// Can also use this parameter to combine events that are in the upper or lower neighbourhood.
// Such combinations will increase bandwidth of the event and this property can be used later to weed out unlikely events.
var spectralEvents1 = combinedResults.NewEvents.Cast<SpectralEvent>().ToList();
var startDiff = genericConfig.SyllableStartDifference;
var hertzDiff = genericConfig.SyllableHertzGap;
combinedResults.NewEvents = CompositeEvent.CombineProximalEvents(spectralEvents1, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
PipitLog.Debug($"Event count after combining proximals = {combinedResults.NewEvents.Count}");
}

// Get the PipitSyllable config.
const string profileName = "PipitSyllable";
var configuration = (PipitConfig)genericConfig;
var chirpConfig = (UpwardTrackParameters)configuration.Profiles[profileName];
// 1: Pull out the whip events and calculate their frequency profiles.
//var (whipEvents, others) = combinedResults.NewEvents.FilterForEventType<WhipEvent, EventCommon>();
// calculate profile here.

// 4: Filter events on the amount of acoustic activity in their upper and lower neighbourhoods - their buffer zone.
// The idea is that an unambiguous event should have some acoustic space above and below.
// The filter requires that the average acoustic activity in each frame and bin of the upper and lower buffer zones should not exceed the user specified decibel threshold.
// The bandwidth of these two neighbourhoods is determined by the following parameters.
// ########## These parameters could be specified by user in config.yml file.
var upperHertzBuffer = 0;
var lowerHertzBuffer = 0;
// Get the Pipit syllable config.
//const string profileName = "PipitSyllable";
//var whipConfig = (UpwardTrackParameters)genericConfig.Profiles[profileName];

// The decibel threshold is currently set 5/6ths of the user specified threshold.
// THIS IS TO BE WATCHED. IT MAY PROVE TO BE INAPPROPRIATE TO HARD-CODE.
// Want the activity in buffer zones to be "somewhat" less than the user-defined threshold.
var neighbourhoodDbThreshold = chirpConfig.DecibelThreshold.Value;

if (upperHertzBuffer > 0 || lowerHertzBuffer > 0)
{
var spectralEvents2 = combinedResults.NewEvents.Cast<SpectralEvent>().ToList();
combinedResults.NewEvents = EventExtentions.FilterEventsOnNeighbourhood(
spectralEvents2,
combinedResults.Sonogram,
lowerHertzBuffer,
upperHertzBuffer,
segmentStartOffset,
neighbourhoodDbThreshold);

PipitLog.Debug($"Event count after filtering on neighbourhood = {combinedResults.NewEvents.Count}");
}

if (combinedResults.NewEvents.Count == 0)
{
PipitLog.Debug($"Return zero events.");
return combinedResults;
}

// 5: Filter on COMPONENT COUNT in Composite events.
int maxComponentCount = 2;
// 2: Filter on COMPONENT COUNT in Composite events.
//int maxComponentCount = 2;
//combinedResults.NewEvents = EventExtentions.FilterEventsOnCompositeContent(combinedResults.NewEvents, maxComponentCount);
PipitLog.Debug($"Event count after filtering on component count = {combinedResults.NewEvents.Count}");

// 6: Filter the events for duration in seconds
var minimumEventDuration = chirpConfig.MinDuration;
var maximumEventDuration = chirpConfig.MaxDuration;
if (genericConfig.CombinePossibleSyllableSequence)
{
minimumEventDuration *= 2.0;
maximumEventDuration *= 1.5;
}
//PipitLog.Debug($"Event count after filtering on component count = {combinedResults.NewEvents.Count}");

//combinedResults.NewEvents = EventExtentions.FilterOnDuration(combinedResults.NewEvents, minimumEventDuration.Value, maximumEventDuration.Value);
// 3: Filter the events for duration in seconds
var minimumEventDuration = 0.1;
var maximumEventDuration = 0.8;
//combinedResults.NewEvents = EventExtentions.FilterOnDuration(combinedResults.NewEvents, minimumEventDuration, maximumEventDuration);
PipitLog.Debug($"Event count after filtering on duration = {combinedResults.NewEvents.Count}");

// 7: Filter the events for bandwidth in Hertz
// 4: Filter the events for bandwidth in Hertz
double average = 4000;
double sd =330;
double sd = 400;
double sigmaThreshold = 3.0;
combinedResults.NewEvents = EventExtentions.FilterOnBandwidth(combinedResults.NewEvents, average, sd, sigmaThreshold);
//combinedResults.NewEvents = EventExtentions.FilterOnBandwidth(combinedResults.NewEvents, average, sd, sigmaThreshold);
PipitLog.Debug($"Event count after filtering on bandwidth = {combinedResults.NewEvents.Count}");

//UNCOMMENT following line if you want special debug spectrogram, i.e. with special plots.
Expand All @@ -200,102 +135,6 @@ public override RecognizerResults Recognize(
return combinedResults;
}

/// <summary>
/// The Boobook call syllable is shaped like an inverted "U". Its total duration is close to 0.15 seconds.
/// The rising portion lasts for 0.06s, followed by a turning portion, 0.03s, followed by the decending portion of 0.06s.
/// The constants for this method were obtained from the calls in a Gympie recording obtained by Yvonne Phillips.
/// </summary>
/// <param name="ev">An event containing at least one forward track i.e. a chirp.</param>
public static void SetFrequencyProfileScore(ChirpEvent ev)
{
const double risingDuration = 0.06;
const double gapDuration = 0.03;
const double fallingDuration = 0.06;

var track = ev.Tracks.First();
var profile = track.GetTrackFrequencyProfile().ToArray();

// get the first point
var firstPoint = track.Points.First();
var frameDuration = firstPoint.Seconds.Maximum - firstPoint.Seconds.Minimum;
var risingFrameCount = (int)Math.Floor(risingDuration / frameDuration);
var gapFrameCount = (int)Math.Floor(gapDuration / frameDuration);
var fallingFrameCount = (int)Math.Floor(fallingDuration / frameDuration);

var startSum = 0.0;
if (profile.Length >= risingFrameCount)
{
for (var i = 0; i <= risingFrameCount; i++)
{
startSum += profile[i];
}
}

int startFrame = risingFrameCount + gapFrameCount;
int endFrame = startFrame + fallingFrameCount;
var endSum = 0.0;
if (profile.Length >= endFrame)
{
for (var i = startFrame; i <= endFrame; i++)
{
endSum += profile[i];
}
}

// set score to 1.0 if the profile has inverted U shape.
double score = 0.0;
if (startSum > 0.0 && endSum < 0.0)
{
score = 1.0;
}

ev.FrequencyProfileScore = score;
}

/// <summary>
/// WARNING - this method assumes that the rising and falling parts of a Boobook call syllable last for 5 frames.
/// </summary>
/// <param name="events">List of spectral events.</param>
public static void WriteFrequencyProfiles(List<ChirpEvent> events)
{
/* Here are the frequency profiles of some events.
* Note that the first five frames (0.057 seconds) have positive slope and subsequent frames have negative slope.
* The final frames are likely to be echo and to be avoided.
* Therefore take the first 0.6s to calculate the positive slope, leave a gap of 0.025 seconds and then get negative slope from the next 0.6 seconds.
42,21,21,42,21, 00, 21,-21,-21,-21, 00,-21,-42
42,42,21,21,42,-21, 21, 00,-21,-21,-21,-21, 00,-21,21,-21
42,42,21,21,42, 00, 00, 00,-21,-21,-21,-21,-21
21,21,00,00,21, 21,-21, 00, 00,-21, 00,-21,-21,21,-21,42
42,42,21,00,42, 00, 00,-21,-21,-21,-21, 00,-21,
21,42,21,21,21, 00,-21,-21,-21, 00,-21,-21
42,21,21,42,21, 21, 00,-21,-21,-21,-21
42,42,21,42,00, 00,-21, 00,-21,-21, 00,-21,-21
*/

var spectralEvents = events.Select(x => (ChirpEvent)x).ToList();
foreach (var ev in spectralEvents)
{
foreach (var track in ev.Tracks)
{
var profile = track.GetTrackFrequencyProfile().ToArray();
var startSum = 0.0;
if (profile.Length >= 5)
{
startSum = profile[0] + profile[1] + profile[2] + profile[3] + profile[4];
}

var endSum = 0.0;
if (profile.Length >= 11)
{
endSum = profile[6] + profile[7] + profile[8] + profile[9] + profile[10];
}

LoggedConsole.WriteLine($"{startSum} {endSum}");
LoggedConsole.WriteLine(DataTools.WriteArrayAsCsvLine(profile, "F0"));
}
}
}

/*
/// <summary>
/// Summarize your results. This method is invoked exactly once per original file.
Expand All @@ -316,11 +155,6 @@ public override void SummariseResults(
/// <inheritdoc cref="PipitConfig"/> />
public class PipitConfig : GenericRecognizerConfig, INamedProfiles<object>
{
public bool CombinePossibleSyllableSequence { get; set; } = false;

public double SyllableStartDifference { get; set; } = 0.5;

public double SyllableHertzGap { get; set; } = 200;
}
}
}
87 changes: 87 additions & 0 deletions src/AnalysisPrograms/Recognizers/GenericRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,48 @@ public override RecognizerResults Recognize(
//SaveDebugSpectrogram(allResults, genericConfig, outputDirectory, "name");
}

// ######################################################## POST-PROCESSING OF GENERIC EVENTS

Log.Debug($"Total event count = {allResults.NewEvents.Count}");

// 1: Combine overlapping events.
// This will be necessary where many small events have been found - possibly because the dB threshold is set low.
if (configuration.CombineOverlappingEvents)
{
allResults.NewEvents = CompositeEvent.CombineOverlappingEvents(allResults.NewEvents.Cast<EventCommon>().ToList());
Log.Debug($"Event count after combining overlapped events = {allResults.NewEvents.Count}");
}

// 2: Combine proximal events, that is, events that may be a sequence of syllables in the same strophe.
// Can also use this parameter to combine events that are in the upper or lower neighbourhood.
// Such combinations will increase bandwidth of the event and this property can be used later to weed out unlikely events.
if (configuration.CombinePossibleSyllableSequence)
{
// Must first convert events to spectral events.
var spectralEvents1 = allResults.NewEvents.Cast<SpectralEvent>().ToList();
var startDiff = configuration.SyllableStartDifference;
var hertzDiff = configuration.SyllableHertzGap;
allResults.NewEvents = CompositeEvent.CombineProximalEvents(spectralEvents1, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
Log.Debug($"Event count after combining proximals = {allResults.NewEvents.Count}");
}

// 3: Filter events on the amount of acoustic activity in their upper and lower neighbourhoods - their buffer zone.
// The idea is that an unambiguous event should have some acoustic space above and below.
// The filter requires that the average acoustic activity in each frame and bin of the upper and lower buffer zones should not exceed the user specified decibel threshold.
if (configuration.NeighbourhoodUpperHertzBuffer > 0 || configuration.NeighbourhoodLowerHertzBuffer > 0)
{
var spectralEvents2 = allResults.NewEvents.Cast<SpectralEvent>().ToList();
allResults.NewEvents = EventExtentions.FilterEventsOnNeighbourhood(
spectralEvents2,
allResults.Sonogram,
configuration.NeighbourhoodLowerHertzBuffer,
configuration.NeighbourhoodUpperHertzBuffer,
segmentStartOffset,
configuration.NeighbourhoodDbThreshold);

Log.Debug($"Event count after filtering on neighbourhood = {allResults.NewEvents.Count}");
}

return allResults;
}

Expand Down Expand Up @@ -405,6 +447,51 @@ public class GenericRecognizerConfig : RecognizerConfig, INamedProfiles<object>
{
/// <inheritdoc />
public Dictionary<string, object> Profiles { get; set; }

// ########### THE FOLLOWING PROPERTIES ARE FOR POST-PROCESSING OF EVeNTS.

/// <summary>
/// Gets or sets a value indicating Whether or not to combine overlapping events.
/// </summary>
public bool CombineOverlappingEvents { get; set; }

/// <summary>
/// Gets or sets a value indicating Whether or not to combine events that constitute a sequence of the same strophe.
/// </summary>
public bool CombinePossibleSyllableSequence { get; set; }

/// <summary>
/// Gets or sets a value indicating the maximum allowable start time gap (seconds) between events within the same strophe.
/// This value is used only where CombinePossibleSyllableSequence = true.
/// </summary>
public double SyllableStartDifference { get; set; }

/// <summary>
/// Gets or sets a value indicating the maximum allowable difference (in Hertz) between the frequency bands of two events. I.e. events should be in similar frequency band.
/// NOTE: SIMILAR frequency band means the differences between two top Hertz values and the two low Hertz values are less than hertzDifference.
/// This value is used only where CombinePossibleSyllableSequence = true.
/// </summary>
public double SyllableHertzGap { get; set; }

// #### The next three properties determine filtering of events based on acoustic conctent of upper and lower buffer zones.

/// <summary>
/// Gets or sets a value indicating Whether or not to filter events based on acoustic conctent of upper buffer zone.
/// If value = 0, the upper neighbourhood is ignored.
/// </summary>
public int NeighbourhoodUpperHertzBuffer { get; set; }

/// <summary>
/// Gets or sets a value indicating Whether or not to filter events based on the acoustic content of their lower buffer zone.
/// If value = 0, the lower neighbourhood is ignored.
/// </summary>
public int NeighbourhoodLowerHertzBuffer { get; set; }

/// <summary>
/// Gets or sets a value indicating the decibel threshold for acoustic activity in the upper and lower buffer zones.
/// This value is used only if NeighbourhoodLowerHertzBuffer > 0 OR NeighbourhoodUpperHertzBuffer > 0.
/// </summary>
public double NeighbourhoodDbThreshold { get; set; }
}
}
}
2 changes: 1 addition & 1 deletion src/AudioAnalysisTools/Events/EventExtentions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public static List<EventCommon> FilterOnDuration(List<EventCommon> events, doubl
}

double[,] subMatrix2 = null;
if (upperHertzBuffer > 0)
if (lowerHertzBuffer > 0)
{
subMatrix2 = GetLowerNeighbourhood(ev, spectrogramData, lowerHertzBuffer, lowerBinGap, converter);
}
Expand Down
Loading

0 comments on commit 76cf9ff

Please sign in to comment.