Skip to content

Commit

Permalink
single config file for each experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreasPlt committed Jan 7, 2025
1 parent bb46dc7 commit 3551d74
Show file tree
Hide file tree
Showing 7 changed files with 648 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from sisyphus import tk
import os

from i6_experiments.users.vieting.experiments.librispeech.\
librispeech_100_ctc.fairseq_finetuning.ctc_standalone.experiments.ctc_phon.baseline import eow_phon_ls100_ctc_base
from i6_experiments.users.vieting.experiments.librispeech.\
librispeech_960_pretraining.wav2vec2.config_02_fairseq_phoneme import \
get_fairseq_root, \
run_fairseq_pretraining


# pretraining
other_target_pretrain_job = run_fairseq_pretraining(
exp_name="monophone_negatives_other_target_v1",
commit="1397363c5c0e3c4e3ab620be562730399c852493",
python_exe_hash_overwrite="itc_python_launcher_py310_torch",
negative_sampling_strategy="other_target",
)


neg_hard_pretrain_job = run_fairseq_pretraining(
exp_name="monophone_negatives_hard_v1",
commit="be51394d876428ad531e0786d80de43d6a8818af",
python_exe_hash_overwrite="itc_python_launcher_py310_torch",
negative_sampling_strategy="hard_negatives",
)

neg_hard_pretrain_jobs = dict()
neg_hard_pretrain_jobs[0] = neg_hard_pretrain_job
for start_cp in [50, 100, 150, 200]:
neg_hard_pretrain_jobs[start_cp] = run_fairseq_pretraining(
exp_name=f"monophone_negatives_hard_after_{start_cp}ep_other_v1",
commit="be51394d876428ad531e0786d80de43d6a8818af",
python_exe_hash_overwrite="itc_python_launcher_py310_torch",
checkpoint=other_target_pretrain_job.out_models[start_cp].model,
negative_sampling_strategy="hard_negatives",
)

# fairseq root
fairseq_root = get_fairseq_root(fairseq_exe=tk.Path("/usr/bin/python3"))

# Finetuning
base_model_conf = {
"_name": "wav2vec_ctc",
"apply_mask": True,
"mask_prob": 0.65,
"mask_channel_prob": 0.5,
"mask_channel_length": 64,
"layerdrop": 0.1,
"activation_dropout": 0.1,
"feature_grad_mult": 0.0,
"freeze_finetune_updates": 10000, # was 0 in fairseq config
}

for start_cp in [50, 100, 150, 200]:
for additional_cp in range(50, 600+1-start_cp, 50):
model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_hard_pretrain_jobs[start_cp].out_models[start_cp + additional_cp].model
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join("w2v_negatives_hard", f"other_{start_cp}_hard_{additional_cp}"),
fairseq_root=fairseq_root,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
from sisyphus import tk
import os

from i6_experiments.users.vieting.experiments.librispeech.\
librispeech_100_ctc.fairseq_finetuning.ctc_standalone.experiments.ctc_phon.baseline import eow_phon_ls100_ctc_base
from i6_experiments.users.vieting.experiments.librispeech.\
librispeech_960_pretraining.wav2vec2.config_02_fairseq_phoneme import \
get_fairseq_root, \
run_fairseq_pretraining


# pretraining
neg_other_pretrain_job = run_fairseq_pretraining(
exp_name="monophone_negatives_other_target_v1",
commit="1397363c5c0e3c4e3ab620be562730399c852493",
python_exe_hash_overwrite="itc_python_launcher_py310_torch",
negative_sampling_strategy="other_target",
)

# fairseq root
fairseq_root = get_fairseq_root(fairseq_exe=tk.Path("/usr/bin/python3"))

# Finetuning

base_model_conf = {
"_name": "wav2vec_ctc",
"apply_mask": True,
"mask_prob": 0.65,
"mask_channel_prob": 0.5,
"mask_channel_length": 64,
"layerdrop": 0.1,
"activation_dropout": 0.1,
"feature_grad_mult": 0.0,
"freeze_finetune_updates": 10000, # was 0 in fairseq config
}

checkpoints = [100, 200, 300, 400, 500, 600]
for checkpoint in checkpoints:
# negative sampling
model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[checkpoint].model
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join("w2v_neg_sampling_other_target", f"checkpoint_{checkpoint}"),
fairseq_root=fairseq_root,
)


# finetuning experiments only for the last checkpoint
CHECKPOINT = 600
# random vs phoneme mask in finetuning
model_conf_w2v = base_model_conf.copy() # base model, no need to set `mask_strategy` and `mask_length`
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"random_spec",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)
model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_strategy"] = "phoneme"
model_conf_w2v["mask_length"] = 1
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"phoneme_spec",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)

# phoneme mask lengths in finetuning
model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_strategy"] = "phoneme"
model_conf_w2v["mask_length"] = 1
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"1_phoneme_spec",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)
model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_strategy"] = "phoneme"
model_conf_w2v["mask_length"] = 2
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"2_phoneme_spec",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)

model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_strategy"] = "phoneme"
model_conf_w2v["mask_length"] = 1
model_conf_w2v["mask_other"] = 1
model_conf_w2v["mask_selection"] = "uniform"
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"1_2_phoneme_spec",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)

# mask probability in finetuning
model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_prob"] = 0.35
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"0_35_phoneme_mask_prob",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)

model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_prob"] = 0.5
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"0_5_phoneme_mask_prob",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)

model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_prob"] = 0.65 # base model
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"0_65_phoneme_mask_prob",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)

model_conf_w2v = base_model_conf.copy()
model_conf_w2v["w2v_path"] = neg_other_pretrain_job.out_models[CHECKPOINT].model
model_conf_w2v["mask_prob"] = 0.8
eow_phon_ls100_ctc_base(
model_conf_w2v=model_conf_w2v,
train_name_suffix=os.path.join(
"w2v_neg_sampling_other_target",
"0_8_phoneme_mask_prob",
f"checkpoint_{CHECKPOINT}"
),
fairseq_root=fairseq_root,
)
Loading

0 comments on commit 3551d74

Please sign in to comment.