diff --git a/README.md b/README.md index 42d0916..f501896 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ # GAN-Voice-Conversion Implementation of GAN architectures for Voice Conversion +## Requirements + +Install Python 3.5. Then install the requirements specified in `requirements.txt` + ## How to run + 1) Download the data by running `download_data.py` 2) Choose the source and target speakers in `preprocess.py` and run it 3) Run the corresponding training script diff --git a/models/cyclegan_vc.py b/models/cyclegan_vc.py index cfd0ad0..ab87213 100644 --- a/models/cyclegan_vc.py +++ b/models/cyclegan_vc.py @@ -137,7 +137,7 @@ def train(self, input_A, input_B, lambda_cycle, lambda_identity, generator_learn generation_A, generation_B, generator_loss, _, generator_summaries = self.sess.run( [self.generation_A, self.generation_B, self.generator_loss, self.generator_optimizer, - self.generator_summaries], \ + self.generator_summaries], feed_dict={self.lambda_cycle: lambda_cycle, self.lambda_identity: lambda_identity, self.input_A_real: input_A, self.input_B_real: input_B, self.generator_learning_rate: generator_learning_rate}) @@ -145,7 +145,7 @@ def train(self, input_A, input_B, lambda_cycle, lambda_identity, generator_learn self.writer.add_summary(generator_summaries, self.train_step) discriminator_loss, _, discriminator_summaries = self.sess.run( - [self.discriminator_loss, self.discriminator_optimizer, self.discriminator_summaries], \ + [self.discriminator_loss, self.discriminator_optimizer, self.discriminator_summaries], feed_dict={self.input_A_real: input_A, self.input_B_real: input_B, self.discriminator_learning_rate: discriminator_learning_rate, self.input_A_fake: generation_A, self.input_B_fake: generation_B}) diff --git a/models/cyclegan_vc2.py b/models/cyclegan_vc2.py index fee0184..f35caab 100644 --- a/models/cyclegan_vc2.py +++ b/models/cyclegan_vc2.py @@ -40,6 +40,9 @@ def build_model(self): # Placeholders for fake generated samples self.input_A_fake = tf.placeholder(tf.float32, shape=self.input_shape, name='input_A_fake') self.input_B_fake = tf.placeholder(tf.float32, shape=self.input_shape, name='input_B_fake') + # Placeholders for cycle generated samples + self.input_A_cycle = tf.placeholder(tf.float32, shape=self.input_shape, name='input_A_cycle') + self.input_B_cycle = tf.placeholder(tf.float32, shape=self.input_shape, name='input_B_cycle') # Placeholder for test samples self.input_A_test = tf.placeholder(tf.float32, shape=self.input_shape, name='input_A_test') self.input_B_test = tf.placeholder(tf.float32, shape=self.input_shape, name='input_B_test') @@ -62,11 +65,18 @@ def build_model(self): batch_size=self.batch_size, reuse=True, scope_name='generator_A2B') + # One-step discriminator self.discrimination_A_fake = self.discriminator(inputs=self.generation_A, reuse=False, scope_name='discriminator_A') self.discrimination_B_fake = self.discriminator(inputs=self.generation_B, reuse=False, scope_name='discriminator_B') + # Two-step discriminator + self.discrimination_A_dot_fake = self.discriminator(inputs=self.cycle_A, reuse=False, + scope_name='discriminator_A_dot') + self.discrimination_B_dot_fake = self.discriminator(inputs=self.cycle_B, reuse=False, + scope_name='discriminator_B_dot') + # Cycle loss self.cycle_loss = l1_loss(y=self.input_A_real, y_hat=self.cycle_A) + l1_loss(y=self.input_B_real, y_hat=self.cycle_B) @@ -79,15 +89,24 @@ def build_model(self): self.lambda_cycle = tf.placeholder(tf.float32, None, name='lambda_cycle') self.lambda_identity = tf.placeholder(tf.float32, None, name='lambda_identity') - # Generator loss + # ------------------------------- Generator loss # Generator wants to fool discriminator self.generator_loss_A2B = l2_loss(y=tf.ones_like(self.discrimination_B_fake), y_hat=self.discrimination_B_fake) self.generator_loss_B2A = l2_loss(y=tf.ones_like(self.discrimination_A_fake), y_hat=self.discrimination_A_fake) + # Two-step generator loss + self.two_step_generator_loss_A = l2_loss(y=tf.ones_like(self.discrimination_A_dot_fake), + y_hat=self.discrimination_A_dot_fake) + self.two_step_generator_loss_B = l2_loss(y=tf.ones_like(self.discrimination_B_dot_fake), + y_hat=self.discrimination_B_dot_fake) + # Merge the two generators and the cycle loss - self.generator_loss = self.generator_loss_A2B + self.generator_loss_B2A + self.lambda_cycle * self.cycle_loss + self.lambda_identity * self.identity_loss + self.generator_loss = self.generator_loss_A2B + self.generator_loss_B2A + \ + self.two_step_generator_loss_A + self.two_step_generator_loss_B + \ + self.lambda_cycle * self.cycle_loss + self.lambda_identity * self.identity_loss - # Discriminator loss + # ------------------------------- Discriminator loss + # One-step self.discrimination_input_A_real = self.discriminator(inputs=self.input_A_real, reuse=True, scope_name='discriminator_A') self.discrimination_input_B_real = self.discriminator(inputs=self.input_B_real, reuse=True, @@ -97,6 +116,16 @@ def build_model(self): self.discrimination_input_B_fake = self.discriminator(inputs=self.input_B_fake, reuse=True, scope_name='discriminator_B') + # Two-step + self.discrimination_input_A_dot_real = self.discriminator(inputs=self.input_A_real, reuse=True, + scope_name='discriminator_A_dot') + self.discrimination_input_B_dot_real = self.discriminator(inputs=self.input_B_real, reuse=True, + scope_name='discriminator_B_dot') + self.discrimination_input_A_dot_fake = self.discriminator(inputs=self.input_A_cycle, reuse=True, + scope_name='discriminator_A_dot') + self.discrimination_input_B_dot_fake = self.discriminator(inputs=self.input_B_cycle, reuse=True, + scope_name='discriminator_B_dot') + # Discriminator wants to classify real and fake correctly self.discriminator_loss_input_A_real = l2_loss(y=tf.ones_like(self.discrimination_input_A_real), y_hat=self.discrimination_input_A_real) @@ -110,8 +139,23 @@ def build_model(self): y_hat=self.discrimination_input_B_fake) self.discriminator_loss_B = (self.discriminator_loss_input_B_real + self.discriminator_loss_input_B_fake) / 2 + # Two-step discriminator loss + self.two_step_discriminator_loss_input_A_real = l2_loss(y=tf.ones_like(self.discrimination_input_A_dot_real), + y_hat=self.discrimination_input_A_dot_real) + self.two_step_discriminator_loss_input_A_fake = l2_loss(y=tf.zeros_like(self.discrimination_input_A_dot_fake), + y_hat=self.discrimination_input_A_dot_fake) + self.two_step_discriminator_loss_A = (self.two_step_discriminator_loss_input_A_real + + self.two_step_discriminator_loss_input_A_fake) / 2 + self.two_step_discriminator_loss_input_B_real = l2_loss(y=tf.ones_like(self.discrimination_input_B_dot_real), + y_hat=self.discrimination_input_B_dot_real) + self.two_step_discriminator_loss_input_B_fake = l2_loss(y=tf.zeros_like(self.discrimination_input_B_dot_fake), + y_hat=self.discrimination_input_B_dot_fake) + self.two_step_discriminator_loss_B = (self.two_step_discriminator_loss_input_B_real + + self.two_step_discriminator_loss_input_B_fake) / 2 + # Merge the two discriminators into one - self.discriminator_loss = self.discriminator_loss_A + self.discriminator_loss_B + self.discriminator_loss = self.discriminator_loss_A + self.discriminator_loss_B + \ + self.two_step_discriminator_loss_A + self.two_step_discriminator_loss_B # Categorize variables because we have to optimize the two sets of the variables separately trainable_variables = tf.trainable_variables() @@ -139,9 +183,9 @@ def optimizer_initializer(self): def train(self, input_A, input_B, lambda_cycle, lambda_identity, generator_learning_rate, discriminator_learning_rate): - generation_A, generation_B, generator_loss, _, generator_summaries = self.sess.run( - [self.generation_A, self.generation_B, self.generator_loss, self.generator_optimizer, - self.generator_summaries], \ + generation_A, generation_B, cycle_A, cycle_B, generator_loss, _, generator_summaries = self.sess.run( + [self.generation_A, self.generation_B, self.cycle_A, self.cycle_B, self.generator_loss, + self.generator_optimizer, self.generator_summaries], feed_dict={self.lambda_cycle: lambda_cycle, self.lambda_identity: lambda_identity, self.input_A_real: input_A, self.input_B_real: input_B, self.generator_learning_rate: generator_learning_rate}) @@ -149,10 +193,11 @@ def train(self, input_A, input_B, lambda_cycle, lambda_identity, generator_learn self.writer.add_summary(generator_summaries, self.train_step) discriminator_loss, _, discriminator_summaries = self.sess.run( - [self.discriminator_loss, self.discriminator_optimizer, self.discriminator_summaries], \ + [self.discriminator_loss, self.discriminator_optimizer, self.discriminator_summaries], feed_dict={self.input_A_real: input_A, self.input_B_real: input_B, - self.discriminator_learning_rate: discriminator_learning_rate, self.input_A_fake: generation_A, - self.input_B_fake: generation_B}) + self.discriminator_learning_rate: discriminator_learning_rate, + self.input_A_fake: generation_A, self.input_B_fake: generation_B, + self.input_A_cycle: cycle_A, self.input_B_cycle: cycle_B}) self.writer.add_summary(discriminator_summaries, self.train_step) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..dcac099 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +numpy==1.16.0 +tensorflow-gpu==1.8 +progressbar2==3.37.1 +librosa==0.6.0 +ffmpeg==1.4 +pyworld==0.2.8 +wget==3.2 +tqdm==4.31.1 \ No newline at end of file diff --git a/train_cyclegan_vc2.py b/train_cyclegan_vc2.py index 2d7143f..d5504a4 100644 --- a/train_cyclegan_vc2.py +++ b/train_cyclegan_vc2.py @@ -9,7 +9,7 @@ dataset = 'vcc2018' src_speaker = 'VCC2SF3' trg_speaker = 'VCC2TM1' -model_name = 'cyclegan_vc2' +model_name = 'cyclegan_vc2_two_step' os.makedirs(os.path.join('experiments', dataset, model_name, 'checkpoints'), exist_ok=True) log_dir = os.path.join('logs', '{}_{}'.format(dataset, model_name)) os.makedirs(log_dir, exist_ok=True)