diff --git a/.travis.yml b/.travis.yml index 76a5eda..76489da 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ sudo: required +dist: bionic language: python python: 3.9 diff --git a/Melt/cacophony_index.py b/Melt/cacophony_index.py index 92c2e0e..ef60431 100755 --- a/Melt/cacophony_index.py +++ b/Melt/cacophony_index.py @@ -114,8 +114,8 @@ def calculate(source_file_name): table.append(entry) result = {} - result["cacophony_index"] = table - result["cacophony_index_version"] = "2020-01-20_A" + result["cacophony_index_old"] = table + result["cacophony_index_old_version"] = "2020-01-20_A" if table == []: p = source_data.shape[0] / sample_rate result["ci_warning"] = ( diff --git a/Melt/chain.py b/Melt/chain.py index ea44796..3090684 100755 --- a/Melt/chain.py +++ b/Melt/chain.py @@ -11,17 +11,82 @@ import common from identify_species import identify_species from identify_bird import classify +import math + + +NON_BIRD = ["human", "noise"] + + +def calc_cacophony_index(tracks, length): + version = "1.0" + other_labels = [other for other in tracks if other["species"] != "human"] + bird_percent = 0 + bird_until = -1 + period_length = 20 + bins = math.ceil(length / 20) + percents = [] + for i in range(bins): + percents.append( + { + "begin_s": i * period_length, + "end_s": (i + 1) * period_length, + "index_percent": 0, + } + ) + period_end = period_length + period = 0 + for track in other_labels: + if track["species"] not in NON_BIRD: + # bird started in existing span + if bird_until >= track["begin_s"] and bird_until < track["end_s"]: + new_span = (bird_until, track["end_s"]) + # bird started after current span + elif bird_until < track["end_s"]: + new_span = (track["begin_s"], track["end_s"]) + else: + continue + if new_span[1] > period_end: + while new_span[1] > period_end: + if new_span[0] < period_end: + bird_percent += period_end - new_span[0] + new_span = (period_end, new_span[1]) + # bird_percent = min(period_length, new_span[1] - period_end) + percents[period]["index_percent"] = round( + 100 * bird_percent / period_length, 1 + ) + + bird_percent = 0 + period_end += period_length + period += 1 + # else: + bird_percent += new_span[1] - new_span[0] + # bird_until = new_span[1] + bird_until = new_span[1] + period = min(len(percents) - 1, int(bird_until / period_length)) + if period < len(percents): + percents[period]["index_percent"] = round(100 * bird_percent / period_length, 1) + + return percents, version + + +def filter_trcks(tracks): + filtered_labels = ["noise"] + filtered = [t for t in tracks if t["species"] not in filtered_labels] + return filtered def species_identify(file_name, metadata_name, models, bird_model): - labels = identify_species(file_name, metadata_name, models) - other_labels = classify(file_name, bird_model) - other_labels = [other for other in other_labels if other["species"] != "human"] + other_labels, length = classify(file_name, bird_model) + other_labels = filter_trcks(other_labels) + cacophony_index, version = calc_cacophony_index(other_labels, length) + labels.extend(other_labels) result = {} result["species_identify"] = labels result["species_identify_version"] = "2021-02-01" + result["cacophony_index"] = cacophony_index + result["cacophony_index_version"] = version return result diff --git a/Melt/identify_bird.py b/Melt/identify_bird.py index 1f60051..1592632 100644 --- a/Melt/identify_bird.py +++ b/Melt/identify_bird.py @@ -5,53 +5,81 @@ import logging import sys import json - -SEG_LENGTH = 3 -SEG_STRIDE = 1 +import audioread.ffdec # Use ffmpeg decoder +import math fmt = "%(process)d %(thread)s:%(levelname)7s %(message)s" logging.basicConfig( stream=sys.stderr, level=logging.INFO, format=fmt, datefmt="%Y-%m-%d %H:%M:%S" ) - - -def load_samples(path): - frames, sr = librosa.load(path, sr=None) +PROB_THRESH = 0.8 + + +def load_recording(file, resample=48000): + # librosa.load(file) giving strange results + aro = audioread.ffdec.FFmpegAudioFile(file) + frames, sr = librosa.load(aro) + aro.close() + if resample is not None and resample != sr: + frames = librosa.resample(frames, orig_sr=sr, target_sr=resample) + sr = resample + return frames, sr + + +def load_samples(path, segment_length, stride, hop_length=640, mean_sub=False): + logging.debug( + "Loading samples with length %s stride %s hop length %s and mean_sub %s", + segment_length, + stride, + hop_length, + mean_sub, + ) + frames, sr = load_recording(path) mels = [] i = 0 n_fft = sr // 10 - hop_length = 640 # feature frame rate of 75 - - mel_all = librosa.feature.melspectrogram( - y=frames, - sr=sr, - n_fft=n_fft, - hop_length=hop_length, - fmin=50, - fmax=11000, - n_mels=80, - ) - mel_all = librosa.power_to_db(mel_all, ref=np.max) - mel_sample_size = int(1 + SEG_LENGTH * sr / hop_length) - jumps_per_stride = int(mel_sample_size / 3.0) + # hop_length = 640 # feature frame rate of 75 - length = mel_all.shape[1] - end = 0 + sample_size = int(sr * segment_length) + jumps_per_stride = int(sr * stride) + length = len(frames) / sr + end = segment_length mel_samples = [] i = 0 - while end < length: - start = int(jumps_per_stride * (i * SEG_STRIDE)) - end = start + mel_sample_size - mel = mel_all[:, start:end].copy() - mel_m = tf.reduce_mean(mel, axis=1) - mel_m = tf.expand_dims(mel_m, axis=1) - mel = mel - mel_m - if mel.shape[1] != 226: - # pad with zeros - empty = np.zeros(((80, 226))) - empty[:, : mel.shape[1]] = mel - mel = empty + while end < (length + stride): + if end > length: + # always use end ofr last sample + data = frames[-sample_size:] + else: + data = frames[i * jumps_per_stride : i * jumps_per_stride + sample_size] + if len(data) != sample_size: + sample = np.zeros((sample_size)) + sample[: len(data)] = data + data = sample + end += stride + # /start = int(jumps_per_stride * (i * stride)) + mel = librosa.feature.melspectrogram( + y=data, + sr=sr, + n_fft=n_fft, + hop_length=hop_length, + fmin=50, + fmax=11000, + n_mels=80, + ) + half = mel[:, 75:] + if np.amax(half) == np.amin(half): + # noting usefull here stop early + strides_per = math.ceil(segment_length / 2.0 / stride) + 1 + mel_samples = mel_samples[:-strides_per] + break + mel = librosa.power_to_db(mel) + # end = start + sample_size + if mean_sub: + mel_m = tf.reduce_mean(mel, axis=1) + mel_m = tf.expand_dims(mel_m, axis=1) + mel = mel - mel_m mel_samples.append(mel) i += 1 @@ -61,8 +89,11 @@ def load_samples(path): def load_model(model_path): logging.debug("Loading %s", model_path) model_path = Path(model_path) - model = tf.keras.models.load_model(model_path) - model.load_weights(model_path / "val_accuracy").expect_partial() + model = tf.keras.models.load_model( + str(model_path), + compile=False, + ) + # model.load_weights(model_path / "val_binary_accuracy").expect_partial() meta_file = model_path / "metadata.txt" with open(meta_file, "r") as f: meta = json.load(f) @@ -70,52 +101,97 @@ def load_model(model_path): def classify(file, model_file): - global SEG_LENGTH, SEG_STRIDE - samples, length = load_samples(file) model, meta = load_model(model_file) labels = meta.get("labels") + multi_label = meta.get("multi_label") + segment_length = meta.get("segment_length", 3) + segment_stride = meta.get("segment_stride", 1.5) + hop_length = meta.get("hop_length", 640) + mean_sub = meta.get("mean_sub", False) + model_name = meta.get("name", False) + + samples, length = load_samples( + file, segment_length, segment_stride, hop_length, mean_sub=mean_sub + ) predictions = model.predict(samples, verbose=0) - - track = None tracks = [] start = 0 + active_tracks = {} for prediction in predictions: - best_i = np.argmax(prediction) - best_p = prediction[best_i] - label = labels[best_i] - if best_p > 0.7: + # last sample always ends at length of audio rec + if start + segment_length > length: + start = length - segment_length + specific_bird = False + results = [] + track_labels = [] + if multi_label: + for i, p in enumerate(prediction): + if p >= PROB_THRESH: + label = labels[i] + results.append((p, label)) + track_labels.append(label) + specific_bird = specific_bird or label not in [ + "human", + "noise", + "bird", + ] + + else: + best_i = np.argmax(prediction) + best_p = prediction[best_i] + if best_p >= PROB_THRESH: + label = labels[best_i] + results.append((best_p, label)) + track_labels.append(label) + specific_bird = label not in ["human", "noise", "bird"] + + # remove tracks that have ended + existing_tracks = list(active_tracks.keys()) + for existing in existing_tracks: + track = active_tracks[existing] + if track.label not in track_labels or ( + track.label == "bird" and specific_bird + ): + if specific_bird: + track.end = start + else: + track.end = min(length, track.end - segment_length / 2) + del active_tracks[track.label] + + for r in results: + label = r[1] + if specific_bird and label == "bird": + continue + track = active_tracks.get(label, None) if track is None: - track = Track(label, start, start + SEG_LENGTH, best_p) - elif track.label != label: - track.end = start - tracks.append((track)) - track = Track(label, start, start + SEG_LENGTH, best_p) + track = Track(label, start, start + segment_length, r[0], model_name) + tracks.append(track) + active_tracks[label] = track else: - track.confidences.append(best_p) - elif track is not None: - track.end = start + (SEG_LENGTH / 2 - SEG_STRIDE) - tracks.append((track)) - track = None - - start += SEG_STRIDE + track.end = min(length, start + segment_length) + track.confidences.append(r[0]) + # else: - if track is not None: - track.end = length - track.confidences.append(best_p) - tracks.append((track)) + # elif track is not None: + # track.end = start + (segment_length / 2 - segment_stride) + # tracks.append((track)) + # track = None - return [t.get_meta() for t in tracks] + start += segment_stride + return [t.get_meta() for t in tracks], length class Track: - def __init__(self, label, start, end, confidence): + def __init__(self, label, start, end, confidence, model_name): self.start = start self.label = label self.end = end self.confidences = [confidence] + self.model = model_name def get_meta(self): meta = {} + meta["model"] = self.model meta["begin_s"] = self.start meta["end_s"] = self.end meta["species"] = self.label diff --git a/Melt/identify_species.py b/Melt/identify_species.py index 4ccea4f..0a7dd60 100644 --- a/Melt/identify_species.py +++ b/Melt/identify_species.py @@ -126,11 +126,11 @@ def build_entry(begin, end, species, activation): entry["end_s"] = end entry["species"] = species entry["likelihood"] = round(activation * 0.01, 2) + entry["model"] = "morepork" return entry def identify_species(recording, metadata, models): - # get spectrogram to be checked sr, npspec = _load_sample(recording)