Skip to content

Commit

Permalink
Update to morepork-only detection model (#7)
Browse files Browse the repository at this point in the history
* Update to morepork-only detection model

* Respond to PR comments

Co-authored-by: Dennis Sosnoski <[email protected]>
  • Loading branch information
dsosnoski and Dennis Sosnoski authored May 9, 2021
1 parent 94c29d7 commit 8663e76
Show file tree
Hide file tree
Showing 15 changed files with 174 additions and 519 deletions.
4 changes: 3 additions & 1 deletion Melt/.gitignore → .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
__pycache__
*.pyc
venv_*
env_*
.idea

98 changes: 8 additions & 90 deletions Melt/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,91 +9,21 @@
import time

import common
import ensemble
import squawk


def noise_reduce(file_name):
import noise_reduction
sample_rate = 48000
source = common.load_audio_file_as_numpy_array(file_name, sample_rate)
nr = noise_reduction.noise_reduce(source, sample_rate)
return (source, nr, sample_rate)


def find_nr_squawks_from_file_name(file_name):
(source, nr, sample_rate) = noise_reduce(file_name)
squawks = squawk.find_squawks(nr, sample_rate)
return (source, nr, squawks, sample_rate)


def species_identify(source, nr, squawks, sample_rate, verbose=False):
import json
import numpy
import squawk

e = ensemble.ensemble()
for s in squawks:
waveform = squawk.extract_squawk_waveform(nr, sample_rate, s)
e.append_waveform(waveform)
model_version = 'sc_ah'
p = e.apply_model(model_version)

label_file_name = 'model/model_%s_label.json' % model_version
with open(label_file_name, 'r') as f:
label = json.loads(f.read())

tag = []
for row, squawk in zip(p, squawks):
mm = numpy.argmax(row)
m2 = numpy.argsort(row)[-2]
species = label[mm]
if not verbose:
if row[mm] < 0.75:
continue
if row[m2] > 0.3:
continue
if species in 'noise,other,unknown'.split(','):
continue

entry = {}
entry['species'] = species
entry['begin_s'] = round(squawk['begin_i'] / sample_rate, 2)
entry['end_s'] = round(squawk['end_i'] / sample_rate, 2)
if verbose:
entry['confidence'] = '%d%%' % (100 * row[mm])
entry['or'] = '%s (%d%%)' % (label[m2], 100 * row[m2])
tag.append(entry)
result = {}
result['species_identify'] = tag
result['species_identify_version'] = '2019-12-12_A'
return result
from identify_species import identify_species


def speech_detect(source, nr, squawks, sample_rate):
e = ensemble.ensemble()
for s in squawks:
waveform = squawk.extract_squawk_waveform(nr, sample_rate, s)
e.append_waveform(waveform)
p = e.apply_model('sd_aa')
def species_identify(file_name, metadata_name, models):
labels = identify_species(file_name, metadata_name, models)
result = {}
result['speech_detection_version'] = '2019-10-30_A'
human_squawk_count = 0
for(pb, ph) in p:
if pb < 0.1 and ph > 0.95:
human_squawk_count += 1
result['speech_detection'] = (human_squawk_count > 3)
result['species_identify'] = labels
result['species_identify_version'] = '2021-02-01'
return result


def examine(file_name, summary):
def examine(file_name, metadata_name, models, summary):
import cacophony_index
ci = cacophony_index.calculate(file_name)
summary.update(ci)
nss = find_nr_squawks_from_file_name(file_name)
summary.update(speech_detect(*nss))
summary.update(species_identify(*nss))

summary.update(species_identify(file_name, metadata_name, models))

def main():
argv = sys.argv
Expand All @@ -106,20 +36,8 @@ def main():
import cacophony_index
ci = cacophony_index.calculate(argv[2])
summary.update(ci)
elif argv[1] == '-examine':
examine(argv[2], summary)
elif argv[1] == '-noise_reduce':
(source, nr, sample_rate) = noise_reduce(argv[2])
common.write_audio_to_file(
'temp/noise_reduce_stereo.ogg', sample_rate, source, nr)
elif argv[1] == '-species_identify':
nss = find_nr_squawks_from_file_name(argv[2])
summary.update(species_identify(*nss))
elif argv[1] == '-speech_detect':
nss = find_nr_squawks_from_file_name(argv[2])
summary.update(speech_detect(*nss))
else:
result = -1
examine(argv[1], argv[2], argv[3], summary)

t1 = time.time()

Expand Down
55 changes: 0 additions & 55 deletions Melt/ensemble.py

This file was deleted.

157 changes: 157 additions & 0 deletions Melt/identify_species.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@

import librosa
import numpy as np
import os
import tensorflow as tf
print(tf.__version__)

frequency_min = 600
frequency_max = 1200
num_bands = int((frequency_max - frequency_min) / 10)
slices_per_second = 20
seconds_per_sample = 3.0
slices_per_sample = int(slices_per_second * seconds_per_sample)
sample_slide_seconds = 1.0
sample_slide_slices = int(sample_slide_seconds * slices_per_second)
activation_threshold = 1.0

model_file_name = 'saved_model.pb'


def _load_sample(path):
frames, sr = librosa.load(path, sr=None)

# generate spectrogram
nfft = int(sr / 10)
stft = librosa.stft(frames, n_fft=nfft, hop_length=int(nfft / 2))
npspec = np.abs(stft)[int(frequency_min / 10):int(frequency_max / 10)]

return sr, npspec

def _model_paths(basepath):
namelist = os.listdir(basepath)
pathlist = list()
for name in namelist:
namepath = os.path.join(basepath, name)
if os.path.isdir(namepath):
pathlist = pathlist + _model_paths(namepath)
elif namepath.endswith(model_file_name):
pathlist.append(basepath)
return pathlist

def _find_likely_span(liklihoods, start_times, first, last):
"""
Find the likelihood of a morepork call, and the actual time span, corresponding to a span of consecutive samples
with morepork predicted. We're not currently predicting the actual time of a particular morepork call, but we can
make inferences based on the assumption that every sample containing an entire morepork call will give a positive
prediction. This uses heuristics to handle the common cases of two, three, or more samples.
:param liklihoods: percentage liklihoods for all samples
:type liklihoods: list(float)
:param start_times: start time for each sample (normally same interval, but last may be shorter)
:type start_times: list(float)
:param first: first sample index in range with morepork predicted
:type first: int
:param last: last sample index in range with morepork predicted
:type last: int
:return: liklihood, start_time, end_time
:rtype: float, float, float
"""
count = last - first
first_start_time = start_times[first]
last_end_time = start_times[last] + seconds_per_sample
if count == 0:
# single isolated sample, just return the liklihood and time span for that sample
return liklihoods[first], first_start_time, last_end_time
elif count == 1:
# two consecutive samples, assume call in the overlap span and return maximum liklihood with that span
liklihood = max(liklihoods[first], liklihoods[last])
return liklihood, first_start_time + sample_slide_seconds, first_start_time + seconds_per_sample
elif count == 2:
# three consecutive samples, probably two calls if max likelihood are the two end values
max_liklihood = max(liklihoods[first:last + 1])
min_liklihood = min(liklihoods[first:last + 1])
if max_liklihood == liklihoods[first + 1]:
# maximum liklihood is middle sample, assume that's where the call actually is
return max_liklihood, start_times[first+1], start_times[first+1] + seconds_per_sample
elif min_liklihood == liklihoods[first]:
# lowest liklihood is the first sample, so assume call probably in overlap and perhaps a second one present
return max_liklihood, start_times[first+1], last_end_time
elif min_liklihood == liklihoods[last]:
# lowest liklihood is the last sample, so assume call probably in first and perhaps a second one present
return max_liklihood, first_start_time, start_times[first+1] + seconds_per_sample
else:
# no good guessing, just return the full span
return max_liklihood, first_start_time, last_end_time
else:
# more than three consecutive samples, just see if we can safely trim the non-overlapping end spans
max_liklihood = max(liklihoods[first:last + 1])
if max_liklihood > liklihoods[first]:
if max_liklihood > liklihoods[last]:
# first and last not highest likelihood, trim off the non-overlapping end spans
return max_liklihood, start_times[first+1], start_times[last-1] + seconds_per_sample
else:
# last is highest likelihood, just trim off non-overlapping start
return max_liklihood, start_times[first+1], last_end_time
elif max_liklihood > liklihoods[last]:
# first is highest likelihood, last is not, just trim off non-overlapping end
return max_liklihood, first_start_time, start_times[last-1] + seconds_per_sample
else:
# first and last both highest likelihood, just return the entire time
return max_liklihood, first_start_time, last_end_time

def build_entry(begin, end, species, activation):
entry = {}
entry['begin_s'] = begin
entry['end_s'] = end
entry['species'] = species
entry['liklihood'] = round(activation * 0.01, 2)
return entry

def identify_species(recording, metadata, models):

# get spectrogram to be checked
sr, npspec = _load_sample(recording)

# divide recording into samples of appropriate length
samples = []
start_times = []
for base in range(0, npspec.shape[1], sample_slide_slices):
limit = base + slices_per_sample
if limit > npspec.shape[1]:
limit = npspec.shape[1]
start = limit - slices_per_sample
start_times.append(start / slices_per_second)
sample = npspec[:, start:limit]
sample = librosa.amplitude_to_db(sample, ref=np.max)
sample = sample / abs(sample.min()) + 1.0
samples.append(sample.reshape(sample.shape + (1,)))
samples = np.array(samples)

# accumulate results from all models
activations_sum = np.zeros(len(samples))
model_paths = _model_paths(models)
for path in model_paths:
model = tf.keras.models.load_model(path)
activations = model.predict(samples).flatten()
activations_sum += activations

# generate labels from summed activations
labels = []
liklihoods = [round(v * 100 / len(model_paths)) for v in activations_sum]
first_index = -1
for i in range(len(samples)):
if activations_sum[i] >= activation_threshold:
# only collect sample ranges where the summed activations are above the threshold value
if first_index < 0:
first_index = i
last_index = i
elif first_index >= 0:
# just past the end of a sample range with activations, record it and clear
liklihood, start_time, end_time = _find_likely_span(liklihoods, start_times, first_index, last_index)
labels.append(build_entry(start_time, end_time, 'morepork', liklihood))
first_index = -1
if first_index >= 0:
# record final sample range with activations
liklihood, start_time, end_time = _find_likely_span(liklihoods, start_times, first_index, last_index)
labels.append(build_entry(start_time, end_time, 'morepork', liklihood))
return labels
Loading

0 comments on commit 8663e76

Please sign in to comment.