diff --git a/project/data/samples.npz b/project/data/samples.npz deleted file mode 100644 index 0b82e27..0000000 Binary files a/project/data/samples.npz and /dev/null differ diff --git a/project/generate.py b/project/generate.py index 333712a..f95d9d9 100644 --- a/project/generate.py +++ b/project/generate.py @@ -3,6 +3,7 @@ import numpy as np import midi import tensorflow as tf +import pypianoroll as roll from keras.layers import Input, Dense, Conv2D from keras.models import Model from tensorflow.keras import layers @@ -18,7 +19,8 @@ output_path = sys.argv[2] treshold = float(sys.argv[3]) #random seed -generate_seed = np.random.rand(12288).reshape(1,96,128) +# generate_seed = np.random.rand(12288).reshape(1,96,128) +generate_seed = np.random.rand(2).reshape(1,-1) # load and predict model = pickle.load(open(trained_model_path, 'rb')) @@ -29,8 +31,7 @@ generated_sample = generated_sample.reshape(96,128) generated_sample = generated_sample > treshold * generated_sample.max() #save to midi -midi.to_midi(generated_sample, output_path='{}.mid'.format(output_path) ) +midi = midi.to_midi(generated_sample, output_path='{}.mid'.format(output_path) ) -#save piano roll to png -plt.imshow(generated_sample, cmap = plt.get_cmap('gray')) -plt.savefig('{}.png'.format(output_path)) +#save plot for preview +roll.plot(midi, filename='{}.png'.format(output_path)) diff --git a/project/train.py b/project/train.py index a5a8ac3..eeda2a7 100644 --- a/project/train.py +++ b/project/train.py @@ -13,27 +13,100 @@ train_data_path = sys.argv[1] save_model_path = sys.argv[2] epochs = int(sys.argv[3]) -model = Sequential() -model.add(LSTM(128,input_shape=(96, 128),return_sequences=True)) -model.add(Dropout(0.3)) -model.add(LSTM(512, return_sequences=True)) -model.add(Dropout(0.3)) -model.add(LSTM(128)) -model.add(Dense(128)) -model.add(Dropout(0.3)) -model.add(Dense(128*96)) -model.add(Activation('softmax')) -model.add(Reshape((96, 128))) -model.compile(loss='categorical_crossentropy', optimizer='rmsprop') +# best model yet - working autoencoder +# model = Sequential() +# model.add(LSTM(128,input_shape=(96, 128),return_sequences=True)) +# model.add(Dropout(0.3)) +# model.add(LSTM(512, return_sequences=True)) +# model.add(Dropout(0.3)) +# model.add(LSTM(128)) +# model.add(Dense(96)) +# model.add(Dropout(0.3)) +# model.add(Dense(128*96)) +# model.add(Activation('softmax')) +# model.add(Reshape((96, 128))) +# model.compile(loss='binary_crossentropy', optimizer='rmsprop') + +# # working model #2 +# model = Sequential() +# model.add(LSTM(128, input_shape=(96, 128), return_sequences=True)) +# model.add(LSTM(512, return_sequences=True)) +# model.add(TimeDistributed(Dense(128))) +# model.add(Activation('softmax')) +# model.add(Reshape((96, 128))) +# model.compile(loss='binary_crossentropy', optimizer='adadelta') + +# VAE model - LSTM +from keras.layers import Lambda, Input, Dense +from keras.models import Model +from keras.datasets import mnist +from keras.losses import mse, binary_crossentropy +from keras.utils import plot_model +from keras import backend as K +import numpy as np +import matplotlib.pyplot as plt +import argparse +import os + +def sampling(args): + z_mean, z_log_var = args + batch = K.shape(z_mean)[0] + dim = K.int_shape(z_mean)[1] + epsilon = K.random_normal(shape=(batch, dim)) + return z_mean + K.exp(0.5 * z_log_var) * epsilon + +# network parameters +original_dim = 96 * 128 +input_shape = (96,128) +intermediate_dim = 128 +batch_size = 128 +latent_dim = 2 + +# Encoder +inputs = Input(shape=input_shape, name='encoder_input') +x = LSTM(intermediate_dim, activation='relu', name='first_lstm')(inputs) + +z_mean = Dense(latent_dim, name='z_mean')(x) +z_log_var = Dense(latent_dim, name='z_log_var')(x) + +z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) + +encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') + +# build decoder model +latent_inputs = Input(shape=(latent_dim,), name='z_sampling') +x = LSTM(intermediate_dim, return_sequences=True, activation='relu')(latent_inputs) +outputs = Dense(original_dim, activation='sigmoid')(x) +reshaped = Reshape((96,128))(outputs) + +# instantiate decoder model +decoder = Model(latent_inputs, outputs, name='decoder') +# plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) + +# instantiate VAE model +outputs = decoder(encoder(inputs)[2]) +vae = Model(inputs, outputs, name='vae_mlp') # load training data print('Traing Samples: {}'.format(train_data_path)) train_X = np.load(train_data_path)['arr_0'] +# train_X = train_X.reshape((train_X.shape[0], 96*128)) + +# compiling model + +def vae_loss(inputs, outputs): + xent_loss = binary_crossentropy(inputs, outputs) + kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) + return xent_loss + kl_loss + +vae.compile(optimizer='rmsprop', loss=vae_loss) +# vae.summary() +# plot_model(vae, to_file='vae_mlp.png', show_shapes=True) # model training -model.fit(train_X, train_X, epochs=epochs, batch_size=32) +vae.fit(train_X,train_X, epochs=epochs, batch_size=32) # save trained model pickle_path = '{}.pickle'.format(save_model_path) -pickle.dump(model, open(pickle_path,'wb')) +pickle.dump(decoder, open(pickle_path,'wb')) print("Model save to {}".format(pickle_path))