Skip to content

Commit

Permalink
Filter tracks (#13)
Browse files Browse the repository at this point in the history
* allow for multi label model

* added support for multi label model results

* new cacophony index

* update to use specific bird tag rather than bird when available and filter out noise

* use ffmpeg instead of librosa

* stop classifying on empty audio data

* add modelname

---------

Co-authored-by: gferraro <[email protected]>
  • Loading branch information
gferraro and gferraro authored Apr 12, 2023
1 parent 2eca64e commit aa7e92e
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 69 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
sudo: required
dist: bionic

language: python
python: 3.9
Expand Down
4 changes: 2 additions & 2 deletions Melt/cacophony_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ def calculate(source_file_name):
table.append(entry)

result = {}
result["cacophony_index"] = table
result["cacophony_index_version"] = "2020-01-20_A"
result["cacophony_index_old"] = table
result["cacophony_index_old_version"] = "2020-01-20_A"
if table == []:
p = source_data.shape[0] / sample_rate
result["ci_warning"] = (
Expand Down
71 changes: 68 additions & 3 deletions Melt/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,82 @@
import common
from identify_species import identify_species
from identify_bird import classify
import math


NON_BIRD = ["human", "noise"]


def calc_cacophony_index(tracks, length):
version = "1.0"
other_labels = [other for other in tracks if other["species"] != "human"]
bird_percent = 0
bird_until = -1
period_length = 20
bins = math.ceil(length / 20)
percents = []
for i in range(bins):
percents.append(
{
"begin_s": i * period_length,
"end_s": (i + 1) * period_length,
"index_percent": 0,
}
)
period_end = period_length
period = 0
for track in other_labels:
if track["species"] not in NON_BIRD:
# bird started in existing span
if bird_until >= track["begin_s"] and bird_until < track["end_s"]:
new_span = (bird_until, track["end_s"])
# bird started after current span
elif bird_until < track["end_s"]:
new_span = (track["begin_s"], track["end_s"])
else:
continue
if new_span[1] > period_end:
while new_span[1] > period_end:
if new_span[0] < period_end:
bird_percent += period_end - new_span[0]
new_span = (period_end, new_span[1])
# bird_percent = min(period_length, new_span[1] - period_end)
percents[period]["index_percent"] = round(
100 * bird_percent / period_length, 1
)

bird_percent = 0
period_end += period_length
period += 1
# else:
bird_percent += new_span[1] - new_span[0]
# bird_until = new_span[1]
bird_until = new_span[1]
period = min(len(percents) - 1, int(bird_until / period_length))
if period < len(percents):
percents[period]["index_percent"] = round(100 * bird_percent / period_length, 1)

return percents, version


def filter_trcks(tracks):
filtered_labels = ["noise"]
filtered = [t for t in tracks if t["species"] not in filtered_labels]
return filtered


def species_identify(file_name, metadata_name, models, bird_model):

labels = identify_species(file_name, metadata_name, models)
other_labels = classify(file_name, bird_model)
other_labels = [other for other in other_labels if other["species"] != "human"]
other_labels, length = classify(file_name, bird_model)
other_labels = filter_trcks(other_labels)
cacophony_index, version = calc_cacophony_index(other_labels, length)

labels.extend(other_labels)
result = {}
result["species_identify"] = labels
result["species_identify_version"] = "2021-02-01"
result["cacophony_index"] = cacophony_index
result["cacophony_index_version"] = version
return result


Expand Down
202 changes: 139 additions & 63 deletions Melt/identify_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,53 +5,81 @@
import logging
import sys
import json

SEG_LENGTH = 3
SEG_STRIDE = 1
import audioread.ffdec # Use ffmpeg decoder
import math

fmt = "%(process)d %(thread)s:%(levelname)7s %(message)s"

logging.basicConfig(
stream=sys.stderr, level=logging.INFO, format=fmt, datefmt="%Y-%m-%d %H:%M:%S"
)


def load_samples(path):
frames, sr = librosa.load(path, sr=None)
PROB_THRESH = 0.8


def load_recording(file, resample=48000):
# librosa.load(file) giving strange results
aro = audioread.ffdec.FFmpegAudioFile(file)
frames, sr = librosa.load(aro)
aro.close()
if resample is not None and resample != sr:
frames = librosa.resample(frames, orig_sr=sr, target_sr=resample)
sr = resample
return frames, sr


def load_samples(path, segment_length, stride, hop_length=640, mean_sub=False):
logging.debug(
"Loading samples with length %s stride %s hop length %s and mean_sub %s",
segment_length,
stride,
hop_length,
mean_sub,
)
frames, sr = load_recording(path)
mels = []
i = 0
n_fft = sr // 10
hop_length = 640 # feature frame rate of 75

mel_all = librosa.feature.melspectrogram(
y=frames,
sr=sr,
n_fft=n_fft,
hop_length=hop_length,
fmin=50,
fmax=11000,
n_mels=80,
)
mel_all = librosa.power_to_db(mel_all, ref=np.max)
mel_sample_size = int(1 + SEG_LENGTH * sr / hop_length)
jumps_per_stride = int(mel_sample_size / 3.0)
# hop_length = 640 # feature frame rate of 75

length = mel_all.shape[1]
end = 0
sample_size = int(sr * segment_length)
jumps_per_stride = int(sr * stride)
length = len(frames) / sr
end = segment_length
mel_samples = []
i = 0
while end < length:
start = int(jumps_per_stride * (i * SEG_STRIDE))
end = start + mel_sample_size
mel = mel_all[:, start:end].copy()
mel_m = tf.reduce_mean(mel, axis=1)
mel_m = tf.expand_dims(mel_m, axis=1)
mel = mel - mel_m
if mel.shape[1] != 226:
# pad with zeros
empty = np.zeros(((80, 226)))
empty[:, : mel.shape[1]] = mel
mel = empty
while end < (length + stride):
if end > length:
# always use end ofr last sample
data = frames[-sample_size:]
else:
data = frames[i * jumps_per_stride : i * jumps_per_stride + sample_size]
if len(data) != sample_size:
sample = np.zeros((sample_size))
sample[: len(data)] = data
data = sample
end += stride
# /start = int(jumps_per_stride * (i * stride))
mel = librosa.feature.melspectrogram(
y=data,
sr=sr,
n_fft=n_fft,
hop_length=hop_length,
fmin=50,
fmax=11000,
n_mels=80,
)
half = mel[:, 75:]
if np.amax(half) == np.amin(half):
# noting usefull here stop early
strides_per = math.ceil(segment_length / 2.0 / stride) + 1
mel_samples = mel_samples[:-strides_per]
break
mel = librosa.power_to_db(mel)
# end = start + sample_size
if mean_sub:
mel_m = tf.reduce_mean(mel, axis=1)
mel_m = tf.expand_dims(mel_m, axis=1)
mel = mel - mel_m

mel_samples.append(mel)
i += 1
Expand All @@ -61,61 +89,109 @@ def load_samples(path):
def load_model(model_path):
logging.debug("Loading %s", model_path)
model_path = Path(model_path)
model = tf.keras.models.load_model(model_path)
model.load_weights(model_path / "val_accuracy").expect_partial()
model = tf.keras.models.load_model(
str(model_path),
compile=False,
)
# model.load_weights(model_path / "val_binary_accuracy").expect_partial()
meta_file = model_path / "metadata.txt"
with open(meta_file, "r") as f:
meta = json.load(f)
return model, meta


def classify(file, model_file):
global SEG_LENGTH, SEG_STRIDE
samples, length = load_samples(file)
model, meta = load_model(model_file)
labels = meta.get("labels")
multi_label = meta.get("multi_label")
segment_length = meta.get("segment_length", 3)
segment_stride = meta.get("segment_stride", 1.5)
hop_length = meta.get("hop_length", 640)
mean_sub = meta.get("mean_sub", False)
model_name = meta.get("name", False)

samples, length = load_samples(
file, segment_length, segment_stride, hop_length, mean_sub=mean_sub
)
predictions = model.predict(samples, verbose=0)

track = None
tracks = []
start = 0
active_tracks = {}
for prediction in predictions:
best_i = np.argmax(prediction)
best_p = prediction[best_i]
label = labels[best_i]
if best_p > 0.7:
# last sample always ends at length of audio rec
if start + segment_length > length:
start = length - segment_length
specific_bird = False
results = []
track_labels = []
if multi_label:
for i, p in enumerate(prediction):
if p >= PROB_THRESH:
label = labels[i]
results.append((p, label))
track_labels.append(label)
specific_bird = specific_bird or label not in [
"human",
"noise",
"bird",
]

else:
best_i = np.argmax(prediction)
best_p = prediction[best_i]
if best_p >= PROB_THRESH:
label = labels[best_i]
results.append((best_p, label))
track_labels.append(label)
specific_bird = label not in ["human", "noise", "bird"]

# remove tracks that have ended
existing_tracks = list(active_tracks.keys())
for existing in existing_tracks:
track = active_tracks[existing]
if track.label not in track_labels or (
track.label == "bird" and specific_bird
):
if specific_bird:
track.end = start
else:
track.end = min(length, track.end - segment_length / 2)
del active_tracks[track.label]

for r in results:
label = r[1]
if specific_bird and label == "bird":
continue
track = active_tracks.get(label, None)
if track is None:
track = Track(label, start, start + SEG_LENGTH, best_p)
elif track.label != label:
track.end = start
tracks.append((track))
track = Track(label, start, start + SEG_LENGTH, best_p)
track = Track(label, start, start + segment_length, r[0], model_name)
tracks.append(track)
active_tracks[label] = track
else:
track.confidences.append(best_p)
elif track is not None:
track.end = start + (SEG_LENGTH / 2 - SEG_STRIDE)
tracks.append((track))
track = None

start += SEG_STRIDE
track.end = min(length, start + segment_length)
track.confidences.append(r[0])
# else:

if track is not None:
track.end = length
track.confidences.append(best_p)
tracks.append((track))
# elif track is not None:
# track.end = start + (segment_length / 2 - segment_stride)
# tracks.append((track))
# track = None

return [t.get_meta() for t in tracks]
start += segment_stride
return [t.get_meta() for t in tracks], length


class Track:
def __init__(self, label, start, end, confidence):
def __init__(self, label, start, end, confidence, model_name):
self.start = start
self.label = label
self.end = end
self.confidences = [confidence]
self.model = model_name

def get_meta(self):
meta = {}
meta["model"] = self.model
meta["begin_s"] = self.start
meta["end_s"] = self.end
meta["species"] = self.label
Expand Down
2 changes: 1 addition & 1 deletion Melt/identify_species.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ def build_entry(begin, end, species, activation):
entry["end_s"] = end
entry["species"] = species
entry["likelihood"] = round(activation * 0.01, 2)
entry["model"] = "morepork"
return entry


def identify_species(recording, metadata, models):

# get spectrogram to be checked
sr, npspec = _load_sample(recording)

Expand Down

0 comments on commit aa7e92e

Please sign in to comment.