From 9549082c5f8ff5c226dd6e67652b176bb02f799c Mon Sep 17 00:00:00 2001 From: Aidan Pine Date: Thu, 31 Oct 2024 22:21:08 +0000 Subject: [PATCH] fix: the vocoder expects [B, K, T] tensors and this applies during training too --- fs2/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs2/model.py b/fs2/model.py index 5a41c38..23660ee 100644 --- a/fs2/model.py +++ b/fs2/model.py @@ -353,7 +353,7 @@ def _validation_global_step_0(self, batch, batch_idx) -> None: self.config.preprocessing.audio.output_sampling_rate, ) if self.config.training.vocoder_path: - input_ = batch["mel"] + input_ = batch["mel"].transpose(1, 2) vocoder_ckpt = torch.load( self.config.training.vocoder_path, map_location=input_.device ) @@ -431,7 +431,7 @@ def _validation_batch_idx_0(self, batch, batch_idx, output) -> None: ) if self.config.training.vocoder_path: - input_ = output[self.output_key] + input_ = output[self.output_key].transpose(1, 2) vocoder_ckpt = torch.load( self.config.training.vocoder_path, map_location=input_.device )