From 76cf9ffc727e55bf8f5ee29bfc68c3dd375e5212 Mon Sep 17 00:00:00 2001 From: towsey Date: Fri, 5 Jun 2020 21:14:22 +1000 Subject: [PATCH] Work on Australiasian Pipit recognizer Issue #321 Transfer some post-processing steps from specific recognizer to GenericRecognizer class --- .../Towsey.AnthusNovaeseelandiae.yml | 23 +- .../Birds/AnthusNovaeseelandiae.cs | 198 ++---------------- .../Recognizers/GenericRecognizer.cs | 87 ++++++++ .../Events/EventExtentions.cs | 2 +- .../Events/Types/CompositeEvent.cs | 1 + .../Recognizers/AustralPipitTests.cs | 2 +- 6 files changed, 118 insertions(+), 195 deletions(-) diff --git a/src/AnalysisConfigFiles/RecognizerConfigFiles/Towsey.AnthusNovaeseelandiae.yml b/src/AnalysisConfigFiles/RecognizerConfigFiles/Towsey.AnthusNovaeseelandiae.yml index 6ba9db529..886d36538 100644 --- a/src/AnalysisConfigFiles/RecognizerConfigFiles/Towsey.AnthusNovaeseelandiae.yml +++ b/src/AnalysisConfigFiles/RecognizerConfigFiles/Towsey.AnthusNovaeseelandiae.yml @@ -23,25 +23,26 @@ Profiles: MaxHertz: 7000 MinBandwidthHertz: 500 MaxBandwidthHertz: 5000 - MinDuration: 0.1 - MaxDuration: 0.5 - DecibelThreshold: 6.0 + #MinDuration: 0.1 + #MaxDuration: 0.5 + DecibelThreshold: 9.0 #################### POST-PROCESSING of EVENTS ################### # A: First post-processing steps are to combine overlapping/proximal/sequential events # 1: Combine overlapping events -#CombineOverlappingEvents: false +CombineOverlappingEvents: true -# 2: Combine each pair of Boobook syllables as one event -# Can also use this to "mop up" events in neighbourhood - these can be removed later. -CombinePossibleSyllableSequence: false -SyllableStartDifference: 3.0 -SyllableHertzGap: 35 +# 2: Combine syllables that possibly belong to the saem strophe. +# Can also use this to "mop up" events in neighbourhood - for later removal. +CombinePossibleSyllableSequence: true +SyllableStartDifference: 0.25 +SyllableHertzGap: 3000 # B: Filter the events for excess activity in their upper and lower buffer zones -LowerHertzBuffer: 150 -UpperHertzBuffer: 400 +NeighbourhoodLowerHertzBuffer: 200 +NeighbourhoodUpperHertzBuffer: 0 +NeighbourhoodDbThreshold: 9.0 # C: Options to save results files # 4: Available options for saving data files (case-sensitive): [False/Never | True/Always | WhenEventsDetected] diff --git a/src/AnalysisPrograms/Recognizers/Birds/AnthusNovaeseelandiae.cs b/src/AnalysisPrograms/Recognizers/Birds/AnthusNovaeseelandiae.cs index 6596cc8d3..dbcd928f4 100644 --- a/src/AnalysisPrograms/Recognizers/Birds/AnthusNovaeseelandiae.cs +++ b/src/AnalysisPrograms/Recognizers/Birds/AnthusNovaeseelandiae.cs @@ -97,101 +97,36 @@ public override RecognizerResults Recognize( //var newEvents = spectralEvents.Cast().ToList(); //var spectralEvents = events.Select(x => (SpectralEvent)x).ToList(); - // 1: Pull out the chirp events and calculate their frequency profiles. - var (whipEvents, others) = combinedResults.NewEvents.FilterForEventType(); - - // Uncomment the next line when want to obtain the event frequency profiles. - // WriteFrequencyProfiles(chirpEvents); - - //foreach (var ev in whipEvents) - //{ - // // Calculate frequency profile score for event - // SetFrequencyProfileScore((ChirpEvent)ev); - //} - if (combinedResults.NewEvents.Count == 0) { PipitLog.Debug($"Return zero events."); return combinedResults; } - // 2: Combine overlapping events. If the dB threshold is set low, may get lots of little events. - combinedResults.NewEvents = CompositeEvent.CombineOverlappingEvents(whipEvents.Cast().ToList()); - PipitLog.Debug($"Event count after combining overlaps = {combinedResults.NewEvents.Count}"); - - // 3: Combine proximal events. If the dB threshold is set low, may get lots of little events. - if (genericConfig.CombinePossibleSyllableSequence) - { - // Convert events to spectral events for combining of possible sequences. - // Can also use this parameter to combine events that are in the upper or lower neighbourhood. - // Such combinations will increase bandwidth of the event and this property can be used later to weed out unlikely events. - var spectralEvents1 = combinedResults.NewEvents.Cast().ToList(); - var startDiff = genericConfig.SyllableStartDifference; - var hertzDiff = genericConfig.SyllableHertzGap; - combinedResults.NewEvents = CompositeEvent.CombineProximalEvents(spectralEvents1, TimeSpan.FromSeconds(startDiff), (int)hertzDiff); - PipitLog.Debug($"Event count after combining proximals = {combinedResults.NewEvents.Count}"); - } - - // Get the PipitSyllable config. - const string profileName = "PipitSyllable"; - var configuration = (PipitConfig)genericConfig; - var chirpConfig = (UpwardTrackParameters)configuration.Profiles[profileName]; + // 1: Pull out the whip events and calculate their frequency profiles. + //var (whipEvents, others) = combinedResults.NewEvents.FilterForEventType(); + // calculate profile here. - // 4: Filter events on the amount of acoustic activity in their upper and lower neighbourhoods - their buffer zone. - // The idea is that an unambiguous event should have some acoustic space above and below. - // The filter requires that the average acoustic activity in each frame and bin of the upper and lower buffer zones should not exceed the user specified decibel threshold. - // The bandwidth of these two neighbourhoods is determined by the following parameters. - // ########## These parameters could be specified by user in config.yml file. - var upperHertzBuffer = 0; - var lowerHertzBuffer = 0; + // Get the Pipit syllable config. + //const string profileName = "PipitSyllable"; + //var whipConfig = (UpwardTrackParameters)genericConfig.Profiles[profileName]; - // The decibel threshold is currently set 5/6ths of the user specified threshold. - // THIS IS TO BE WATCHED. IT MAY PROVE TO BE INAPPROPRIATE TO HARD-CODE. - // Want the activity in buffer zones to be "somewhat" less than the user-defined threshold. - var neighbourhoodDbThreshold = chirpConfig.DecibelThreshold.Value; - - if (upperHertzBuffer > 0 || lowerHertzBuffer > 0) - { - var spectralEvents2 = combinedResults.NewEvents.Cast().ToList(); - combinedResults.NewEvents = EventExtentions.FilterEventsOnNeighbourhood( - spectralEvents2, - combinedResults.Sonogram, - lowerHertzBuffer, - upperHertzBuffer, - segmentStartOffset, - neighbourhoodDbThreshold); - - PipitLog.Debug($"Event count after filtering on neighbourhood = {combinedResults.NewEvents.Count}"); - } - - if (combinedResults.NewEvents.Count == 0) - { - PipitLog.Debug($"Return zero events."); - return combinedResults; - } - - // 5: Filter on COMPONENT COUNT in Composite events. - int maxComponentCount = 2; + // 2: Filter on COMPONENT COUNT in Composite events. + //int maxComponentCount = 2; //combinedResults.NewEvents = EventExtentions.FilterEventsOnCompositeContent(combinedResults.NewEvents, maxComponentCount); - PipitLog.Debug($"Event count after filtering on component count = {combinedResults.NewEvents.Count}"); - - // 6: Filter the events for duration in seconds - var minimumEventDuration = chirpConfig.MinDuration; - var maximumEventDuration = chirpConfig.MaxDuration; - if (genericConfig.CombinePossibleSyllableSequence) - { - minimumEventDuration *= 2.0; - maximumEventDuration *= 1.5; - } + //PipitLog.Debug($"Event count after filtering on component count = {combinedResults.NewEvents.Count}"); - //combinedResults.NewEvents = EventExtentions.FilterOnDuration(combinedResults.NewEvents, minimumEventDuration.Value, maximumEventDuration.Value); + // 3: Filter the events for duration in seconds + var minimumEventDuration = 0.1; + var maximumEventDuration = 0.8; + //combinedResults.NewEvents = EventExtentions.FilterOnDuration(combinedResults.NewEvents, minimumEventDuration, maximumEventDuration); PipitLog.Debug($"Event count after filtering on duration = {combinedResults.NewEvents.Count}"); - // 7: Filter the events for bandwidth in Hertz + // 4: Filter the events for bandwidth in Hertz double average = 4000; - double sd =330; + double sd = 400; double sigmaThreshold = 3.0; - combinedResults.NewEvents = EventExtentions.FilterOnBandwidth(combinedResults.NewEvents, average, sd, sigmaThreshold); + //combinedResults.NewEvents = EventExtentions.FilterOnBandwidth(combinedResults.NewEvents, average, sd, sigmaThreshold); PipitLog.Debug($"Event count after filtering on bandwidth = {combinedResults.NewEvents.Count}"); //UNCOMMENT following line if you want special debug spectrogram, i.e. with special plots. @@ -200,102 +135,6 @@ public override RecognizerResults Recognize( return combinedResults; } - /// - /// The Boobook call syllable is shaped like an inverted "U". Its total duration is close to 0.15 seconds. - /// The rising portion lasts for 0.06s, followed by a turning portion, 0.03s, followed by the decending portion of 0.06s. - /// The constants for this method were obtained from the calls in a Gympie recording obtained by Yvonne Phillips. - /// - /// An event containing at least one forward track i.e. a chirp. - public static void SetFrequencyProfileScore(ChirpEvent ev) - { - const double risingDuration = 0.06; - const double gapDuration = 0.03; - const double fallingDuration = 0.06; - - var track = ev.Tracks.First(); - var profile = track.GetTrackFrequencyProfile().ToArray(); - - // get the first point - var firstPoint = track.Points.First(); - var frameDuration = firstPoint.Seconds.Maximum - firstPoint.Seconds.Minimum; - var risingFrameCount = (int)Math.Floor(risingDuration / frameDuration); - var gapFrameCount = (int)Math.Floor(gapDuration / frameDuration); - var fallingFrameCount = (int)Math.Floor(fallingDuration / frameDuration); - - var startSum = 0.0; - if (profile.Length >= risingFrameCount) - { - for (var i = 0; i <= risingFrameCount; i++) - { - startSum += profile[i]; - } - } - - int startFrame = risingFrameCount + gapFrameCount; - int endFrame = startFrame + fallingFrameCount; - var endSum = 0.0; - if (profile.Length >= endFrame) - { - for (var i = startFrame; i <= endFrame; i++) - { - endSum += profile[i]; - } - } - - // set score to 1.0 if the profile has inverted U shape. - double score = 0.0; - if (startSum > 0.0 && endSum < 0.0) - { - score = 1.0; - } - - ev.FrequencyProfileScore = score; - } - - /// - /// WARNING - this method assumes that the rising and falling parts of a Boobook call syllable last for 5 frames. - /// - /// List of spectral events. - public static void WriteFrequencyProfiles(List events) - { - /* Here are the frequency profiles of some events. - * Note that the first five frames (0.057 seconds) have positive slope and subsequent frames have negative slope. - * The final frames are likely to be echo and to be avoided. - * Therefore take the first 0.6s to calculate the positive slope, leave a gap of 0.025 seconds and then get negative slope from the next 0.6 seconds. -42,21,21,42,21, 00, 21,-21,-21,-21, 00,-21,-42 -42,42,21,21,42,-21, 21, 00,-21,-21,-21,-21, 00,-21,21,-21 -42,42,21,21,42, 00, 00, 00,-21,-21,-21,-21,-21 -21,21,00,00,21, 21,-21, 00, 00,-21, 00,-21,-21,21,-21,42 -42,42,21,00,42, 00, 00,-21,-21,-21,-21, 00,-21, -21,42,21,21,21, 00,-21,-21,-21, 00,-21,-21 -42,21,21,42,21, 21, 00,-21,-21,-21,-21 -42,42,21,42,00, 00,-21, 00,-21,-21, 00,-21,-21 -*/ - - var spectralEvents = events.Select(x => (ChirpEvent)x).ToList(); - foreach (var ev in spectralEvents) - { - foreach (var track in ev.Tracks) - { - var profile = track.GetTrackFrequencyProfile().ToArray(); - var startSum = 0.0; - if (profile.Length >= 5) - { - startSum = profile[0] + profile[1] + profile[2] + profile[3] + profile[4]; - } - - var endSum = 0.0; - if (profile.Length >= 11) - { - endSum = profile[6] + profile[7] + profile[8] + profile[9] + profile[10]; - } - - LoggedConsole.WriteLine($"{startSum} {endSum}"); - LoggedConsole.WriteLine(DataTools.WriteArrayAsCsvLine(profile, "F0")); - } - } - } - /* /// /// Summarize your results. This method is invoked exactly once per original file. @@ -316,11 +155,6 @@ public override void SummariseResults( /// /> public class PipitConfig : GenericRecognizerConfig, INamedProfiles { - public bool CombinePossibleSyllableSequence { get; set; } = false; - - public double SyllableStartDifference { get; set; } = 0.5; - - public double SyllableHertzGap { get; set; } = 200; } } } diff --git a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs index d01a29747..3668b982f 100644 --- a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs +++ b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs @@ -335,6 +335,48 @@ public override RecognizerResults Recognize( //SaveDebugSpectrogram(allResults, genericConfig, outputDirectory, "name"); } + // ######################################################## POST-PROCESSING OF GENERIC EVENTS + + Log.Debug($"Total event count = {allResults.NewEvents.Count}"); + + // 1: Combine overlapping events. + // This will be necessary where many small events have been found - possibly because the dB threshold is set low. + if (configuration.CombineOverlappingEvents) + { + allResults.NewEvents = CompositeEvent.CombineOverlappingEvents(allResults.NewEvents.Cast().ToList()); + Log.Debug($"Event count after combining overlapped events = {allResults.NewEvents.Count}"); + } + + // 2: Combine proximal events, that is, events that may be a sequence of syllables in the same strophe. + // Can also use this parameter to combine events that are in the upper or lower neighbourhood. + // Such combinations will increase bandwidth of the event and this property can be used later to weed out unlikely events. + if (configuration.CombinePossibleSyllableSequence) + { + // Must first convert events to spectral events. + var spectralEvents1 = allResults.NewEvents.Cast().ToList(); + var startDiff = configuration.SyllableStartDifference; + var hertzDiff = configuration.SyllableHertzGap; + allResults.NewEvents = CompositeEvent.CombineProximalEvents(spectralEvents1, TimeSpan.FromSeconds(startDiff), (int)hertzDiff); + Log.Debug($"Event count after combining proximals = {allResults.NewEvents.Count}"); + } + + // 3: Filter events on the amount of acoustic activity in their upper and lower neighbourhoods - their buffer zone. + // The idea is that an unambiguous event should have some acoustic space above and below. + // The filter requires that the average acoustic activity in each frame and bin of the upper and lower buffer zones should not exceed the user specified decibel threshold. + if (configuration.NeighbourhoodUpperHertzBuffer > 0 || configuration.NeighbourhoodLowerHertzBuffer > 0) + { + var spectralEvents2 = allResults.NewEvents.Cast().ToList(); + allResults.NewEvents = EventExtentions.FilterEventsOnNeighbourhood( + spectralEvents2, + allResults.Sonogram, + configuration.NeighbourhoodLowerHertzBuffer, + configuration.NeighbourhoodUpperHertzBuffer, + segmentStartOffset, + configuration.NeighbourhoodDbThreshold); + + Log.Debug($"Event count after filtering on neighbourhood = {allResults.NewEvents.Count}"); + } + return allResults; } @@ -405,6 +447,51 @@ public class GenericRecognizerConfig : RecognizerConfig, INamedProfiles { /// public Dictionary Profiles { get; set; } + + // ########### THE FOLLOWING PROPERTIES ARE FOR POST-PROCESSING OF EVeNTS. + + /// + /// Gets or sets a value indicating Whether or not to combine overlapping events. + /// + public bool CombineOverlappingEvents { get; set; } + + /// + /// Gets or sets a value indicating Whether or not to combine events that constitute a sequence of the same strophe. + /// + public bool CombinePossibleSyllableSequence { get; set; } + + /// + /// Gets or sets a value indicating the maximum allowable start time gap (seconds) between events within the same strophe. + /// This value is used only where CombinePossibleSyllableSequence = true. + /// + public double SyllableStartDifference { get; set; } + + /// + /// Gets or sets a value indicating the maximum allowable difference (in Hertz) between the frequency bands of two events. I.e. events should be in similar frequency band. + /// NOTE: SIMILAR frequency band means the differences between two top Hertz values and the two low Hertz values are less than hertzDifference. + /// This value is used only where CombinePossibleSyllableSequence = true. + /// + public double SyllableHertzGap { get; set; } + + // #### The next three properties determine filtering of events based on acoustic conctent of upper and lower buffer zones. + + /// + /// Gets or sets a value indicating Whether or not to filter events based on acoustic conctent of upper buffer zone. + /// If value = 0, the upper neighbourhood is ignored. + /// + public int NeighbourhoodUpperHertzBuffer { get; set; } + + /// + /// Gets or sets a value indicating Whether or not to filter events based on the acoustic content of their lower buffer zone. + /// If value = 0, the lower neighbourhood is ignored. + /// + public int NeighbourhoodLowerHertzBuffer { get; set; } + + /// + /// Gets or sets a value indicating the decibel threshold for acoustic activity in the upper and lower buffer zones. + /// This value is used only if NeighbourhoodLowerHertzBuffer > 0 OR NeighbourhoodUpperHertzBuffer > 0. + /// + public double NeighbourhoodDbThreshold { get; set; } } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/Events/EventExtentions.cs b/src/AudioAnalysisTools/Events/EventExtentions.cs index 80f249369..3ac65c5f2 100644 --- a/src/AudioAnalysisTools/Events/EventExtentions.cs +++ b/src/AudioAnalysisTools/Events/EventExtentions.cs @@ -184,7 +184,7 @@ public static List FilterOnDuration(List events, doubl } double[,] subMatrix2 = null; - if (upperHertzBuffer > 0) + if (lowerHertzBuffer > 0) { subMatrix2 = GetLowerNeighbourhood(ev, spectrogramData, lowerHertzBuffer, lowerBinGap, converter); } diff --git a/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs b/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs index 7cb43d1d9..17149c9ac 100644 --- a/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs +++ b/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs @@ -198,6 +198,7 @@ public static List CombineOverlappingEvents(List event /// /// Combines events that have similar bottom and top frequency bounds and whose start times are within the passed time range. /// NOTE: Proximal means (1) that the event starts are close to one another and (2) the events occupy a SIMILAR frequency band. + /// NOTE: SIMILAR frequency band means the difference between two top Hertz values and the two low Hertz values are less than hertzDifference. /// NOTE: This method is used to combine events that are likely to be a syllable sequence within the same call. /// public static List CombineProximalEvents(List events, TimeSpan startDifference, int hertzDifference) diff --git a/tests/Acoustics.Test/AnalysisPrograms/Recognizers/AustralPipitTests.cs b/tests/Acoustics.Test/AnalysisPrograms/Recognizers/AustralPipitTests.cs index ef6bd6521..216cbec45 100644 --- a/tests/Acoustics.Test/AnalysisPrograms/Recognizers/AustralPipitTests.cs +++ b/tests/Acoustics.Test/AnalysisPrograms/Recognizers/AustralPipitTests.cs @@ -37,7 +37,7 @@ public void TestRecognizer() var config = Recognizer.ParseConfig(ConfigFile); int resampleRate = config.ResampleRate.Value; string opDir = this.TestOutputDirectory.FullName; - string opFileName = "tempFile"; + string opFileName = "tempFile.wav"; var recording = AudioRecording.GetAudioRecording(TestAsset, resampleRate, opDir, opFileName); var results = Recognizer.Recognize(