Filter tracks (#13)

* allow for multi label model * added support for multi label model results * new cacophony index * update to use specific bird tag rather than bird when available and filter out noise * use ffmpeg instead of librosa * stop classifying on empty audio data * add modelname --------- Co-authored-by: gferraro <[email protected]>
TheCacophonyProject · Apr 12, 2023 · aa7e92e · aa7e92e
1 parent 2eca64e
commit aa7e92e
Show file tree

Hide file tree

Showing 5 changed files with 211 additions and 69 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,4 +1,5 @@
 sudo: required
+dist: bionic
 
 language: python
 python: 3.9

diff --git a/Melt/cacophony_index.py b/Melt/cacophony_index.py
@@ -114,8 +114,8 @@ def calculate(source_file_name):
         table.append(entry)
 
     result = {}
-    result["cacophony_index"] = table
-    result["cacophony_index_version"] = "2020-01-20_A"
+    result["cacophony_index_old"] = table
+    result["cacophony_index_old_version"] = "2020-01-20_A"
     if table == []:
         p = source_data.shape[0] / sample_rate
         result["ci_warning"] = (

diff --git a/Melt/chain.py b/Melt/chain.py
@@ -11,17 +11,82 @@
 import common
 from identify_species import identify_species
 from identify_bird import classify
+import math
+
+
+NON_BIRD = ["human", "noise"]
+
+
+def calc_cacophony_index(tracks, length):
+    version = "1.0"
+    other_labels = [other for other in tracks if other["species"] != "human"]
+    bird_percent = 0
+    bird_until = -1
+    period_length = 20
+    bins = math.ceil(length / 20)
+    percents = []
+    for i in range(bins):
+        percents.append(
+            {
+                "begin_s": i * period_length,
+                "end_s": (i + 1) * period_length,
+                "index_percent": 0,
+            }
+        )
+    period_end = period_length
+    period = 0
+    for track in other_labels:
+        if track["species"] not in NON_BIRD:
+            # bird started in existing span
+            if bird_until >= track["begin_s"] and bird_until < track["end_s"]:
+                new_span = (bird_until, track["end_s"])
+            # bird started after current span
+            elif bird_until < track["end_s"]:
+                new_span = (track["begin_s"], track["end_s"])
+            else:
+                continue
+            if new_span[1] > period_end:
+                while new_span[1] > period_end:
+                    if new_span[0] < period_end:
+                        bird_percent += period_end - new_span[0]
+                        new_span = (period_end, new_span[1])
+                        # bird_percent = min(period_length, new_span[1] - period_end)
+                    percents[period]["index_percent"] = round(
+                        100 * bird_percent / period_length, 1
+                    )
+
+                    bird_percent = 0
+                    period_end += period_length
+                    period += 1
+            # else:
+            bird_percent += new_span[1] - new_span[0]
+            # bird_until = new_span[1]
+            bird_until = new_span[1]
+            period = min(len(percents) - 1, int(bird_until / period_length))
+    if period < len(percents):
+        percents[period]["index_percent"] = round(100 * bird_percent / period_length, 1)
+
+    return percents, version
+
+
+def filter_trcks(tracks):
+    filtered_labels = ["noise"]
+    filtered = [t for t in tracks if t["species"] not in filtered_labels]
+    return filtered
 
 
 def species_identify(file_name, metadata_name, models, bird_model):
-
     labels = identify_species(file_name, metadata_name, models)
-    other_labels = classify(file_name, bird_model)
-    other_labels = [other for other in other_labels if other["species"] != "human"]
+    other_labels, length = classify(file_name, bird_model)
+    other_labels = filter_trcks(other_labels)
+    cacophony_index, version = calc_cacophony_index(other_labels, length)
+
     labels.extend(other_labels)
     result = {}
     result["species_identify"] = labels
     result["species_identify_version"] = "2021-02-01"
+    result["cacophony_index"] = cacophony_index
+    result["cacophony_index_version"] = version
     return result
 
 

diff --git a/Melt/identify_bird.py b/Melt/identify_bird.py
@@ -5,53 +5,81 @@
 import logging
 import sys
 import json
-
-SEG_LENGTH = 3
-SEG_STRIDE = 1
+import audioread.ffdec  # Use ffmpeg decoder
+import math
 
 fmt = "%(process)d %(thread)s:%(levelname)7s %(message)s"
 
 logging.basicConfig(
     stream=sys.stderr, level=logging.INFO, format=fmt, datefmt="%Y-%m-%d %H:%M:%S"
 )
-
-
-def load_samples(path):
-    frames, sr = librosa.load(path, sr=None)
+PROB_THRESH = 0.8
+
+
+def load_recording(file, resample=48000):
+    # librosa.load(file) giving strange results
+    aro = audioread.ffdec.FFmpegAudioFile(file)
+    frames, sr = librosa.load(aro)
+    aro.close()
+    if resample is not None and resample != sr:
+        frames = librosa.resample(frames, orig_sr=sr, target_sr=resample)
+        sr = resample
+    return frames, sr
+
+
+def load_samples(path, segment_length, stride, hop_length=640, mean_sub=False):
+    logging.debug(
+        "Loading samples with length %s stride %s hop length %s and mean_sub %s",
+        segment_length,
+        stride,
+        hop_length,
+        mean_sub,
+    )
+    frames, sr = load_recording(path)
     mels = []
     i = 0
     n_fft = sr // 10
-    hop_length = 640  # feature frame rate of 75
-
-    mel_all = librosa.feature.melspectrogram(
-        y=frames,
-        sr=sr,
-        n_fft=n_fft,
-        hop_length=hop_length,
-        fmin=50,
-        fmax=11000,
-        n_mels=80,
-    )
-    mel_all = librosa.power_to_db(mel_all, ref=np.max)
-    mel_sample_size = int(1 + SEG_LENGTH * sr / hop_length)
-    jumps_per_stride = int(mel_sample_size / 3.0)
+    # hop_length = 640  # feature frame rate of 75
 
-    length = mel_all.shape[1]
-    end = 0
+    sample_size = int(sr * segment_length)
+    jumps_per_stride = int(sr * stride)
+    length = len(frames) / sr
+    end = segment_length
     mel_samples = []
     i = 0
-    while end < length:
-        start = int(jumps_per_stride * (i * SEG_STRIDE))
-        end = start + mel_sample_size
-        mel = mel_all[:, start:end].copy()
-        mel_m = tf.reduce_mean(mel, axis=1)
-        mel_m = tf.expand_dims(mel_m, axis=1)
-        mel = mel - mel_m
-        if mel.shape[1] != 226:
-            # pad with zeros
-            empty = np.zeros(((80, 226)))
-            empty[:, : mel.shape[1]] = mel
-            mel = empty
+    while end < (length + stride):
+        if end > length:
+            # always use end ofr last sample
+            data = frames[-sample_size:]
+        else:
+            data = frames[i * jumps_per_stride : i * jumps_per_stride + sample_size]
+        if len(data) != sample_size:
+            sample = np.zeros((sample_size))
+            sample[: len(data)] = data
+            data = sample
+        end += stride
+        # /start = int(jumps_per_stride * (i * stride))
+        mel = librosa.feature.melspectrogram(
+            y=data,
+            sr=sr,
+            n_fft=n_fft,
+            hop_length=hop_length,
+            fmin=50,
+            fmax=11000,
+            n_mels=80,
+        )
+        half = mel[:, 75:]
+        if np.amax(half) == np.amin(half):
+            # noting usefull here stop early
+            strides_per = math.ceil(segment_length / 2.0 / stride) + 1
+            mel_samples = mel_samples[:-strides_per]
+            break
+        mel = librosa.power_to_db(mel)
+        # end = start + sample_size
+        if mean_sub:
+            mel_m = tf.reduce_mean(mel, axis=1)
+            mel_m = tf.expand_dims(mel_m, axis=1)
+            mel = mel - mel_m
 
         mel_samples.append(mel)
         i += 1
@@ -61,61 +89,109 @@ def load_samples(path):
 def load_model(model_path):
     logging.debug("Loading %s", model_path)
     model_path = Path(model_path)
-    model = tf.keras.models.load_model(model_path)
-    model.load_weights(model_path / "val_accuracy").expect_partial()
+    model = tf.keras.models.load_model(
+        str(model_path),
+        compile=False,
+    )
+    # model.load_weights(model_path / "val_binary_accuracy").expect_partial()
     meta_file = model_path / "metadata.txt"
     with open(meta_file, "r") as f:
         meta = json.load(f)
     return model, meta
 
 
 def classify(file, model_file):
-    global SEG_LENGTH, SEG_STRIDE
-    samples, length = load_samples(file)
     model, meta = load_model(model_file)
     labels = meta.get("labels")
+    multi_label = meta.get("multi_label")
+    segment_length = meta.get("segment_length", 3)
+    segment_stride = meta.get("segment_stride", 1.5)
+    hop_length = meta.get("hop_length", 640)
+    mean_sub = meta.get("mean_sub", False)
+    model_name = meta.get("name", False)
+
+    samples, length = load_samples(
+        file, segment_length, segment_stride, hop_length, mean_sub=mean_sub
+    )
     predictions = model.predict(samples, verbose=0)
-
-    track = None
     tracks = []
     start = 0
+    active_tracks = {}
     for prediction in predictions:
-        best_i = np.argmax(prediction)
-        best_p = prediction[best_i]
-        label = labels[best_i]
-        if best_p > 0.7:
+        # last sample always ends at length of audio rec
+        if start + segment_length > length:
+            start = length - segment_length
+        specific_bird = False
+        results = []
+        track_labels = []
+        if multi_label:
+            for i, p in enumerate(prediction):
+                if p >= PROB_THRESH:
+                    label = labels[i]
+                    results.append((p, label))
+                    track_labels.append(label)
+                    specific_bird = specific_bird or label not in [
+                        "human",
+                        "noise",
+                        "bird",
+                    ]
+
+        else:
+            best_i = np.argmax(prediction)
+            best_p = prediction[best_i]
+            if best_p >= PROB_THRESH:
+                label = labels[best_i]
+                results.append((best_p, label))
+                track_labels.append(label)
+                specific_bird = label not in ["human", "noise", "bird"]
+
+        # remove tracks that have ended
+        existing_tracks = list(active_tracks.keys())
+        for existing in existing_tracks:
+            track = active_tracks[existing]
+            if track.label not in track_labels or (
+                track.label == "bird" and specific_bird
+            ):
+                if specific_bird:
+                    track.end = start
+                else:
+                    track.end = min(length, track.end - segment_length / 2)
+                del active_tracks[track.label]
+
+        for r in results:
+            label = r[1]
+            if specific_bird and label == "bird":
+                continue
+            track = active_tracks.get(label, None)
             if track is None:
-                track = Track(label, start, start + SEG_LENGTH, best_p)
-            elif track.label != label:
-                track.end = start
-                tracks.append((track))
-                track = Track(label, start, start + SEG_LENGTH, best_p)
+                track = Track(label, start, start + segment_length, r[0], model_name)
+                tracks.append(track)
+                active_tracks[label] = track
             else:
-                track.confidences.append(best_p)
-        elif track is not None:
-            track.end = start + (SEG_LENGTH / 2 - SEG_STRIDE)
-            tracks.append((track))
-            track = None
-
-        start += SEG_STRIDE
+                track.end = min(length, start + segment_length)
+                track.confidences.append(r[0])
+            # else:
 
-    if track is not None:
-        track.end = length
-        track.confidences.append(best_p)
-        tracks.append((track))
+        # elif track is not None:
+        #     track.end = start + (segment_length / 2 - segment_stride)
+        #     tracks.append((track))
+        #     track = None
 
-    return [t.get_meta() for t in tracks]
+        start += segment_stride
+    return [t.get_meta() for t in tracks], length
 
 
 class Track:
-    def __init__(self, label, start, end, confidence):
+    def __init__(self, label, start, end, confidence, model_name):
         self.start = start
         self.label = label
         self.end = end
         self.confidences = [confidence]
+        self.model = model_name
 
     def get_meta(self):
         meta = {}
+        meta["model"] = self.model
         meta["begin_s"] = self.start
         meta["end_s"] = self.end
         meta["species"] = self.label

diff --git a/Melt/identify_species.py b/Melt/identify_species.py
@@ -126,11 +126,11 @@ def build_entry(begin, end, species, activation):
     entry["end_s"] = end
     entry["species"] = species
     entry["likelihood"] = round(activation * 0.01, 2)
+    entry["model"] = "morepork"
     return entry
 
 
 def identify_species(recording, metadata, models):
-
     # get spectrogram to be checked
     sr, npspec = _load_sample(recording)