-
Notifications
You must be signed in to change notification settings - Fork 125
/
Copy pathdiscriminator.py
96 lines (79 loc) · 3.18 KB
/
discriminator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from keras.models import load_model
import numpy as np
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Dense, Conv2D, Conv3D, BatchNormalization, Activation, \
Concatenate, AvgPool2D, Input, MaxPool2D, UpSampling2D, Add, \
ZeroPadding2D, ZeroPadding3D, Lambda, Reshape, Flatten, LeakyReLU
from keras_contrib.layers import InstanceNormalization
from keras.callbacks import ModelCheckpoint
from keras import backend as K
import keras
import cv2
import os
import librosa
import scipy
from keras.utils import plot_model
import tensorflow as tf
from keras.utils import multi_gpu_model
import tensorflow as tf
from keras import backend as K
class ModelMGPU(Model):
def __init__(self, ser_model, gpus):
pmodel = multi_gpu_model(ser_model, gpus)
self.__dict__.update(pmodel.__dict__)
self._smodel = ser_model
def __getattribute__(self, attrname):
'''Override load and save methods to be used from the serial-model. The
serial-model holds references to the weights in the multi-gpu model.
'''
# return Model.__getattribute__(self, attrname)
if 'load' in attrname or 'save' in attrname:
return getattr(self._smodel, attrname)
return super(ModelMGPU, self).__getattribute__(attrname)
def contrastive_loss(y_true, y_pred):
margin = 1.
loss = (1. - y_true) * K.square(y_pred) + y_true * K.square(K.maximum(0., margin - y_pred))
return K.mean(loss)
def conv_block(x, num_filters, kernel_size=3, strides=2, padding='same'):
x = Conv2D(filters=num_filters, kernel_size= kernel_size,
strides=strides, padding=padding)(x)
x = InstanceNormalization()(x)
x = LeakyReLU(alpha=.2)(x)
return x
def create_model(args):
############# encoder for face/identity
input_face = Input(shape=(args.img_size, args.img_size, 3), name="input_face_disc")
x = conv_block(input_face, 64, 7)
x = conv_block(x, 128, 5)
x = conv_block(x, 256, 3)
x = conv_block(x, 512, 3)
x = conv_block(x, 512, 3)
x = Conv2D(filters=512, kernel_size=3, strides=1, padding="valid")(x)
face_embedding = Flatten() (x)
############# encoder for audio
input_audio = Input(shape=(12,35,1), name="input_audio_disc")
x = conv_block(input_audio, 64, 3, 1)
x = conv_block(x, 128, 3, 1)
x = ZeroPadding2D(((1,0),(0,0)))(x)
x = conv_block(x, 256, 3, (1, 2))
x = conv_block(x, 256, 3, 1)
x = conv_block(x, 256, 3, 2)
x = conv_block(x, 512, 3, 2)
x = Conv2D(filters=512, kernel_size = (4, 5),strides=(1,1), padding="valid")(x)
audio_embedding = Flatten() (x)
# L2-normalize before taking L2 distance
l2_normalize = Lambda(lambda x: K.l2_normalize(x, axis=1))
face_embedding = l2_normalize(face_embedding)
audio_embedding = l2_normalize(audio_embedding)
d = Lambda(lambda x: K.sqrt(K.sum(K.square(x[0] - x[1]), axis=1, keepdims=True))) ([face_embedding,
audio_embedding])
model = Model(inputs=[input_face, input_audio], outputs=[d])
model.summary()
if args.n_gpu > 1:
model = ModelMGPU(model , args.n_gpu)
model.compile(loss=contrastive_loss, optimizer=Adam(lr=args.lr))
return model
if __name__ == '__main__':
model = create_model()
#plot_model(model, to_file='model.png', show_shapes=True)