making VAE model in train and generate modules

2019-05-30 20:50:13 +02:00
3 changed files with 93 additions and 19 deletions
--- a/project/data/samples.npz
+++ b/project/data/samples.npz
--- a/project/generate.py
+++ b/project/generate.py
@ -3,6 +3,7 @@
 import numpy as np
 import midi
 import tensorflow as tf
+import pypianoroll as roll
 from keras.layers import Input, Dense, Conv2D
 from keras.models import Model
 from tensorflow.keras import layers
@ -18,7 +19,8 @@ output_path = sys.argv[2]
 treshold = float(sys.argv[3])

 #random seed
-generate_seed = np.random.rand(12288).reshape(1,96,128)
+# generate_seed = np.random.rand(12288).reshape(1,96,128)
+generate_seed = np.random.rand(2).reshape(1,-1)

 # load and predict
 model = pickle.load(open(trained_model_path, 'rb'))
@ -29,8 +31,7 @@ generated_sample = generated_sample.reshape(96,128)
 generated_sample = generated_sample > treshold * generated_sample.max()

 #save to midi
-midi.to_midi(generated_sample, output_path='{}.mid'.format(output_path) )
+midi = midi.to_midi(generated_sample, output_path='{}.mid'.format(output_path) )

-#save piano roll to png
-plt.imshow(generated_sample, cmap = plt.get_cmap('gray'))
-plt.savefig('{}.png'.format(output_path))
+#save plot for preview
+roll.plot(midi, filename='{}.png'.format(output_path))
--- a/project/train.py
+++ b/project/train.py
@ -13,27 +13,100 @@ train_data_path = sys.argv[1]
 save_model_path = sys.argv[2]
 epochs = int(sys.argv[3])

-model = Sequential()
-model.add(LSTM(128,input_shape=(96, 128),return_sequences=True))
-model.add(Dropout(0.3))
-model.add(LSTM(512, return_sequences=True))
-model.add(Dropout(0.3))
-model.add(LSTM(128))
-model.add(Dense(128))
-model.add(Dropout(0.3))
-model.add(Dense(128*96))
-model.add(Activation('softmax'))
-model.add(Reshape((96, 128)))
-model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+# best model yet - working autoencoder
+# model = Sequential()
+# model.add(LSTM(128,input_shape=(96, 128),return_sequences=True))
+# model.add(Dropout(0.3))
+# model.add(LSTM(512, return_sequences=True))
+# model.add(Dropout(0.3))
+# model.add(LSTM(128))
+# model.add(Dense(96))
+# model.add(Dropout(0.3))
+# model.add(Dense(128*96))
+# model.add(Activation('softmax'))
+# model.add(Reshape((96, 128)))
+# model.compile(loss='binary_crossentropy', optimizer='rmsprop')
+
+# # working model #2
+# model = Sequential()
+# model.add(LSTM(128, input_shape=(96, 128), return_sequences=True))
+# model.add(LSTM(512, return_sequences=True))
+# model.add(TimeDistributed(Dense(128)))
+# model.add(Activation('softmax'))
+# model.add(Reshape((96, 128)))
+# model.compile(loss='binary_crossentropy', optimizer='adadelta')
+
+# VAE model - LSTM
+from keras.layers import Lambda, Input, Dense
+from keras.models import Model
+from keras.datasets import mnist
+from keras.losses import mse, binary_crossentropy
+from keras.utils import plot_model
+from keras import backend as K
+import numpy as np
+import matplotlib.pyplot as plt
+import argparse
+import os
+
+def sampling(args):
+    z_mean, z_log_var = args
+    batch = K.shape(z_mean)[0]
+    dim = K.int_shape(z_mean)[1]
+    epsilon = K.random_normal(shape=(batch, dim))
+    return z_mean + K.exp(0.5 * z_log_var) * epsilon
+
+# network parameters
+original_dim = 96 * 128
+input_shape = (96,128)
+intermediate_dim = 128
+batch_size = 128
+latent_dim = 2
+
+# Encoder
+inputs = Input(shape=input_shape, name='encoder_input')
+x = LSTM(intermediate_dim, activation='relu', name='first_lstm')(inputs)
+
+z_mean = Dense(latent_dim, name='z_mean')(x)
+z_log_var = Dense(latent_dim, name='z_log_var')(x)
+
+z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
+
+encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
+
+# build decoder model
+latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
+x = LSTM(intermediate_dim, return_sequences=True, activation='relu')(latent_inputs)
+outputs = Dense(original_dim, activation='sigmoid')(x)
+reshaped = Reshape((96,128))(outputs)
+
+# instantiate decoder model
+decoder = Model(latent_inputs, outputs, name='decoder')
+# plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True)
+
+# instantiate VAE model
+outputs = decoder(encoder(inputs)[2])
+vae = Model(inputs, outputs, name='vae_mlp')

 # load training data
 print('Traing Samples: {}'.format(train_data_path))
 train_X = np.load(train_data_path)['arr_0']
+# train_X = train_X.reshape((train_X.shape[0], 96*128))
+
+# compiling model
+
+def vae_loss(inputs, outputs):
+    xent_loss = binary_crossentropy(inputs, outputs)
+    kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
+    return xent_loss + kl_loss
+
+vae.compile(optimizer='rmsprop', loss=vae_loss)
+# vae.summary()
+# plot_model(vae, to_file='vae_mlp.png', show_shapes=True)

 # model training
-model.fit(train_X, train_X, epochs=epochs, batch_size=32)
+vae.fit(train_X,train_X, epochs=epochs, batch_size=32)

 # save trained model
 pickle_path = '{}.pickle'.format(save_model_path)
-pickle.dump(model, open(pickle_path,'wb'))
+pickle.dump(decoder, open(pickle_path,'wb'))
 print("Model save to {}".format(pickle_path))