Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zipformer recipe for audio tagging #1421

Merged
merged 33 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
baa03c7
initial commit
marcoyang1998 Dec 19, 2023
a1aca34
add datamodule for audioset
marcoyang1998 Dec 19, 2023
bf58b63
minor fix
marcoyang1998 Dec 19, 2023
57ff00d
add softlink
marcoyang1998 Dec 19, 2023
bd01c21
add evaluation script
marcoyang1998 Dec 19, 2023
3e22108
update the manifest
marcoyang1998 Mar 20, 2024
1279355
Merge branch 'master' of github.com:marcoyang1998/icefall into audio_…
marcoyang1998 Mar 20, 2024
4e14800
add export.py
marcoyang1998 Mar 20, 2024
219d55d
support exporting the pretrained model
marcoyang1998 Mar 20, 2024
1921692
add file
marcoyang1998 Mar 20, 2024
9c4db1b
add inference script with a pretrained model
marcoyang1998 Mar 20, 2024
4bce81b
fix style
marcoyang1998 Mar 26, 2024
18479fc
Merge remote-tracking branch 'origin' into audio_tagging
marcoyang1998 Mar 26, 2024
7a8c9b7
fix style
marcoyang1998 Mar 26, 2024
f4c1872
enhance documentation
marcoyang1998 Mar 26, 2024
64dbcd0
minor changes
marcoyang1998 Mar 26, 2024
8b234b3
fix doc
marcoyang1998 Mar 26, 2024
a8ca029
fix the comments; wrap the classifier for jit script
marcoyang1998 Mar 29, 2024
2d1072f
add a file to test jit script model
marcoyang1998 Mar 29, 2024
6a7ac68
minor updates
marcoyang1998 Mar 29, 2024
5a4b712
update comments in evaluate.py
marcoyang1998 Mar 29, 2024
9e9bc75
minor updates
marcoyang1998 Mar 29, 2024
39e7de4
add readme and results
marcoyang1998 Mar 29, 2024
ff2975d
support export onnx model
marcoyang1998 Mar 29, 2024
7bd679f
add onnx pretrained
marcoyang1998 Mar 29, 2024
686d2d9
minor updates
marcoyang1998 Mar 29, 2024
f3e8e42
fix style
marcoyang1998 Apr 7, 2024
01b744f
support onnx export with batch size 1; also works for batch processin…
marcoyang1998 Apr 7, 2024
25d22d9
update the script to generate audioset manfiest
marcoyang1998 Apr 8, 2024
ff484be
add prepare.sh
marcoyang1998 Apr 8, 2024
1ca4646
add missing files
marcoyang1998 Apr 8, 2024
864914f
update comments
marcoyang1998 Apr 8, 2024
b134889
add link to audioset
marcoyang1998 Apr 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions egs/audioset/AT/local/generate_audioset_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import argparse
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
import csv

import torch
import torchaudio
import logging
import glob
from lhotse import load_manifest, CutSet, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse.cut import MonoCut
from lhotse.audio import Recording
from lhotse.supervision import SupervisionSegment
from argparse import ArgumentParser
csukuangfj marked this conversation as resolved.
Show resolved Hide resolved

from icefall.utils import get_executor, str2bool

torch.set_num_threads(1)
torch.set_num_interop_threads(1)

marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
def parse_csv(csv_file="downloads/audioset/full_train_asedata_with_duration.csv"):
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved

marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
mapping = {}
with open(csv_file, 'r') as fin:
reader = csv.reader(fin, delimiter="\t")
for i, row in enumerate(reader):
if i == 0:
continue
key = "/".join(row[0].split('/')[-2:])
mapping[key] = row[1]
return mapping


def get_parser():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument(
"--dataset-dir",
type=str,
default="downloads/audioset"
)

parser.add_argument(
"--split",
type=str,
default="balanced",
choices=["balanced", "unbalanced", "eval", "eval_all"]
)

parser.add_argument(
"--feat-output-dir",
type=str,
default="data/fbank_audioset",
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
)

return parser

def main():
parser = get_parser()
args = parser.parse_args()

dataset_dir = args.dataset_dir
split = args.split
feat_output_dir = args.feat_output_dir

num_jobs = 15
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
num_mel_bins = 80

import pdb; pdb.set_trace()
if split in ["balanced", "unbalanced"]:
csv_file = "downloads/audioset/full_train_asedata_with_duration.csv"
elif split == "eval":
csv_file = "downloads/audioset/eval.csv"
elif split == "eval_all":
csv_file = "downloads/audioset/eval_all.csv"
else:
raise ValueError()

labels = parse_csv(csv_file)

audio_files = glob.glob(f"{dataset_dir}/eval/wav_all/*.wav")

new_cuts = []
for i, audio in enumerate(audio_files):
cut_id = "/".join(audio.split('/')[-2:])
recording = Recording.from_file(audio, cut_id)
cut = MonoCut(
id=cut_id,
start=0.0,
duration=recording.duration,
channel=0,
recording=recording,
)
supervision = SupervisionSegment(
id=cut_id,
recording_id=cut.recording.id,
start=0.0,
channel=0,
duration=cut.duration,
)
try:
supervision.audio_event = labels[cut_id]
except KeyError:
logging.info(f"No labels found for {cut_id}.")
supervision.audio_event = ""
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
cut.supervisions = [supervision]
new_cuts.append(cut)

if i % 100 == 0 and i:
logging.info(f"Processed {i} cuts until now.")

cuts = CutSet.from_cuts(new_cuts)

extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))

logging.info(f"Computing fbank features for {split}")
with get_executor() as ex:
cuts = cuts.compute_and_store_features(
extractor=extractor,
storage_path=f"{feat_output_dir}/{split}_{args.split}_feats",
marcoyang1998 marked this conversation as resolved.
Show resolved Hide resolved
num_jobs=num_jobs if ex is None else 80,
executor=ex,
storage_type=LilcomChunkyWriter,
)

manifest_output_dir = feat_output_dir + "/" + f"cuts_audioset_{split}.jsonl.gz"

logging.info(f"Storing the manifest to {manifest_output_dir}")
cuts.to_jsonl(manifest_output_dir)

if __name__=="__main__":
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

logging.basicConfig(format=formatter, level=logging.INFO)
main()
Loading
Loading