Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
NumericalMax committed Feb 4, 2024
1 parent 9dadcd6 commit 0d2e5be
Show file tree
Hide file tree
Showing 10 changed files with 1,164 additions and 606 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,4 @@ cython_debug/
/mlruns/
/results/
/src/mlruns/
/test/
1,580 changes: 1,001 additions & 579 deletions analysis/article.ipynb

Large diffs are not rendered by default.

19 changes: 16 additions & 3 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import datetime
import os

import numpy as np
import tensorflow as tf
from keras.src.callbacks import ReduceLROnPlateau, TerminateOnNaN, CSVLogger, EarlyStopping, ModelCheckpoint
from keras.src.optimizers import RMSprop
Expand Down Expand Up @@ -34,7 +35,6 @@ def main(parameters):
######################################################
# DATA LOADING
######################################################
#train, size_train = Helper.load_dataset(parameters['train_dataset'])
train, size_train = Helper.load_multiple_datasets(parameters['train_dataset'])
val, size_val = Helper.load_multiple_datasets(parameters['val_dataset'])

Expand All @@ -55,7 +55,6 @@ def main(parameters):

encoder = Encoder(parameters['latent_dimension'])
decoder = Decoder(parameters['latent_dimension'])

vae = TCVAE(encoder, decoder, parameters['coefficients'], size_train)
vae.compile(optimizer=RMSprop(learning_rate=parameters['learning_rate']))
vae.fit(
Expand All @@ -79,7 +78,7 @@ def main(parameters):

if __name__ == '__main__':
parser = argparse.ArgumentParser(
prog='VECG', description='Representational Learning of ECG using TC-VAE',
prog='VECG', description='Representational Learning of ECG using disentangling VAE',
)
parser.add_argument(
'-p', '--path_config', type=str, default='./params.yml',
Expand All @@ -88,4 +87,18 @@ def main(parameters):

args = parser.parse_args()
parameters = Helper.load_yaml_file(args.path_config)

print(type(parameters['coefficients']['alpha']))

main(parameters)

#for latent_dim in [4, 8, 16, 24]:
# for alpha in [0.1, 0.5]:
# for beta in [0.5, 1.0, 4.0]:
# for gamma in [0.1, 0.5, 1.0]:
# parameters['latent_dimension'] = latent_dim
# parameters['coefficients']['alpha'] = float(alpha)
# parameters['coefficients']['beta'] = float(beta)
# parameters['coefficients']['gamma'] = float(gamma)
# print(parameters)
# main(parameters)
48 changes: 48 additions & 0 deletions src/metrics/disentanglement.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.metrics import mutual_info_score
import numpy as np
from sklearn import svm


class Disentanglement:
Expand Down Expand Up @@ -30,6 +31,36 @@ def _make_discretizer(target, num_bins):
dis = KBinsDiscretizer(num_bins, encode='ordinal').fit(target)
return dis.transform(target)

@staticmethod
def compute_score_matrix(mus, ys, mus_test, ys_test, continuous_factors):
"""Compute score matrix as described in Section 3."""
num_latents = mus.shape[0]
num_factors = ys.shape[0]
score_matrix = np.zeros([num_latents, num_factors])
for i in range(num_latents):
for j in range(num_factors):
mu_i = mus[i, :]
y_j = ys[j, :]
if continuous_factors:
# Attribute is considered continuous.
cov_mu_i_y_j = np.cov(mu_i, y_j, ddof=1)
cov_mu_y = cov_mu_i_y_j[0, 1] ** 2
var_mu = cov_mu_i_y_j[0, 0]
var_y = cov_mu_i_y_j[1, 1]
if var_mu > 1e-12:
score_matrix[i, j] = cov_mu_y * 1. / (var_mu * var_y)
else:
score_matrix[i, j] = 0.
else:
# Attribute is considered discrete.
mu_i_test = mus_test[i, :]
y_j_test = ys_test[j, :]
classifier = svm.LinearSVC(C=0.01, class_weight="balanced")
classifier.fit(mu_i[:, np.newaxis], y_j)
pred = classifier.predict(mu_i_test[:, np.newaxis])
score_matrix[i, j] = np.mean(pred == y_j_test)
return score_matrix

@staticmethod
def compute_mig(mus_train, ys_train, num_bins=10):
"""Computes score based on both training and testing codes and factors."""
Expand All @@ -44,3 +75,20 @@ def compute_mig(mus_train, ys_train, num_bins=10):
sorted_m = np.sort(m, axis=0)[::-1]
score_dict["discrete_mig"] = np.mean(np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:]))
return score_dict

@staticmethod
def compute_avg_diff_top_two(matrix):
sorted_matrix = np.sort(matrix, axis=0)
return np.mean(sorted_matrix[-1, :] - sorted_matrix[-2, :])

@staticmethod
def _compute_sap(mus, ys, mus_test, ys_test, continuous_factors):
"""Computes score based on both training and testing codes and factors."""
score_matrix = Disentanglement.compute_score_matrix(mus, ys, mus_test,
ys_test, continuous_factors)
# Score matrix should have shape [num_latents, num_factors].
assert score_matrix.shape[0] == mus.shape[0]
assert score_matrix.shape[1] == ys.shape[0]
scores_dict = {}
scores_dict["SAP_score"] = Disentanglement.compute_avg_diff_top_two(score_matrix)
return scores_dict
10 changes: 5 additions & 5 deletions src/model/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ def __init__(self, latent_dim):
self.latent_inputs = keras.Input(shape=(latent_dim,))
self.x = tf.keras.layers.Dense(20)(self.latent_inputs)
self.x = tf.keras.layers.Reshape((5, 4))(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=128, kernel_size=2, strides=1, padding='same')(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=256, kernel_size=5, strides=1, padding='same')(self.x)
self.x = tf.keras.layers.LeakyReLU()(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=64, kernel_size=5, strides=2, padding='same')(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=128, kernel_size=5, strides=2, padding='same')(self.x)
self.x = tf.keras.layers.LeakyReLU()(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=32, kernel_size=10, strides=2, padding='same')(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=64, kernel_size=5, strides=2, padding='same')(self.x)
self.x = tf.keras.layers.LeakyReLU()(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=16, kernel_size=20, strides=5, padding='same')(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=32, kernel_size=5, strides=5, padding='same')(self.x)
self.x = tf.keras.layers.LeakyReLU()(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=1, kernel_size=50, strides=5, padding='same')(self.x)
self.x = tf.keras.layers.Conv1DTranspose(filters=1, kernel_size=5, strides=5, padding='same')(self.x)
self.x = tf.keras.layers.LeakyReLU()(self.x)
self.x = tf.keras.layers.Flatten()(self.x)
self.decoder_outputs = tf.keras.layers.Reshape((500,))(self.x)
Expand Down
3 changes: 2 additions & 1 deletion src/model/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ def __init__(self, latent_dim):

self.encoder_inputs = keras.Input(shape=(500,))
self.x = keras.layers.Reshape((500, 1))(self.encoder_inputs)
self.x = self.conv_block_enc(self.x, 32, 5, 1)
self.x = self.conv_block_enc(self.x, 64, 5, 1)
self.x = self.conv_block_enc(self.x, 32, 5, 16)
self.x = self.conv_block_enc(self.x, 16, 5, 32)
self.x = self.conv_block_enc(self.x, 8, 5, 64)
self.x = tf.keras.layers.MaxPooling1D()(self.x)
self.x = tf.keras.layers.Flatten()(self.x)
self.x = tf.keras.layers.Dense(64)(self.x)
Expand Down
4 changes: 3 additions & 1 deletion src/model/tcvae.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from model.vae import VAE
import tensorflow as tf
import tensorflow_probability as tfp

tfd = tfp.distributions


class TCVAE(VAE):
Expand Down Expand Up @@ -34,7 +37,6 @@ def gamma(self):
def gamma(self, value):
self._gamma.assign(value)


def _loss(self, reconstruction, x, mu, log_var, z):
size_batch = tf.shape(x)[0]
logiw_mat = self.log_importance_weight_matrix(size_batch)
Expand Down
16 changes: 8 additions & 8 deletions src/params.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ train_dataset:
batch_size: 1024
val_dataset:
name:
- zheng
split: 'train'
- medalcare
split: 'validation'
shuffle_size: 1024
batch_size: 1024
save_results_path: ../results/
seed: 42
epochs: 200
latent_dimension: 8
learning_rate: 0.001
learning_rate: 0.002
coefficients:
alpha: 1.0
beta: 64.0
gamma: 1.0
alpha: 0.1
beta: 1.0
gamma: 0.1
coefficients_raise: 20
early_stopping: 50000
early_stopping: 50
period_reconstruction_plot: 20
index_tracked_sample: 5
encode_data:
Expand All @@ -37,7 +37,7 @@ encode_data:
fine_tune: False
medalcare:
name: 'medalcare'
splits: [ 'train' ]
splits: [ 'train', 'test', 'validation']
fine_tune: False
ptb:
name: 'ptb'
Expand Down
82 changes: 73 additions & 9 deletions src/utils/visualizations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from neurokit2.signal import signal_smooth
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


class Visualizations:
Expand Down Expand Up @@ -58,18 +61,79 @@ def eval_reconstruction(X, reconstruction, indices, path_eval, titles=None, xlab
plt.close()

@staticmethod
def eval_dimensions(res, path_eval):
mean = np.mean(res, axis=0)
std = np.std(res, axis=0)
plt.figure(figsize=(15, 5))
def eval_dimensions(df, ld, model, dimension, path, l_bound=-10.0, u_bound=10.0, num_samples=1000):

mean_values, std_values = np.mean(df.iloc[:, :ld], axis=0), np.std(df.iloc[:, :ld], axis=0)
result_matrix = np.tile(mean_values, (num_samples, 1))
result_matrix[:, dimension] = np.linspace(l_bound, u_bound, num_samples)
X = model.decode(result_matrix)

M = np.zeros((X.shape))
for k, _ in enumerate(X):
M[k] = signal_smooth(X[k].numpy())

mean = np.mean(M, axis=0)
std = np.std(M, axis=0)
fig = plt.figure(figsize=(15, 5))
fig.tight_layout()
plt.plot(range(0, len(mean)), mean, 'k-')
plt.fill_between(range(0, len(mean)), mean - std, mean + std)
plt.savefig(path_eval + '.png')
plt.close()
plt.title("ECG reconstruction by toggling dimension " + str(dimension) + ".")
fig.savefig(path, dpi=300)

@staticmethod
def plot_trainings_process(train_progress, metrics):
plt.figure(figsize=(10, 5))
fig = plt.figure(figsize=(10, 5))
fig.tight_layout()
for k in metrics:
sns.lineplot(train_progress, x='epoch', y=k)
# ax.set_yscale("log")
ax = sns.lineplot(train_progress, x='epoch', y=k)
ax.set_yscale("log")

@staticmethod
def plot_variations(df, ld, model, dimension=0, num_rows=1000):
mean_values = np.mean(df.iloc[:, :ld], axis=0)
std_values = np.std(df.iloc[:, :ld], axis=0)
result_matrix = np.tile(mean_values, (num_rows, 1))
result_matrix[:, dimension] = np.linspace(-10.0, 10.0, num_rows)
X = model.decode(result_matrix)

reconstruct = pd.DataFrame()
reconstruct['values'] = X.numpy().flatten()

original_array = list(range(0, 500))
desired_length = len(reconstruct)
repeating_array = [original_array[i % len(original_array)] for i in range(desired_length)]

reconstruct['timestamp'] = repeating_array
plt.figure(figsize=(15, 5))
sns.lineplot(data=reconstruct, x="timestamp", y="values")

@staticmethod
def plot_embedding_slice(df, dim_x, dim_y, hue, title_legend, path):
fig = plt.figure(figsize=(10, 10))
fig.tight_layout()
ax = sns.scatterplot(
data=df, x=dim_x, y=dim_y, hue=hue,
)
ax.set(
xlabel='Dimension ' + str(dim_x),
ylabel='Dimension ' + str(dim_y),
title="Slice through the embedding space.",
)
plt.legend(title=title_legend)
plt.tight_layout()
plt.show()
fig.savefig(path, dpi=300)

@staticmethod
def plot_confustion_matrix(X_train, X_test, y_train, y_test, predictor, path):
predictor.fit(X_train, y_train)
predictions = predictor.predict(X_test.fillna(0))
cm = confusion_matrix(y_test, predictions, labels=predictor.classes_)
fig = plt.figure(figsize=(15, 15))
fig.tight_layout()
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=predictor.classes_)
disp.plot()
plt.show()
fig.savefig(path, dpi=300)
return cm
7 changes: 7 additions & 0 deletions tests/tcvae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# content of test_sample.py
def inc(x):
return x + 1


def test_answer():
assert inc(3) == 4

0 comments on commit 0d2e5be

Please sign in to comment.