lstm autoencoder, for now with the best results

This commit is contained in:
Cezary Pukownik 2019-05-30 12:36:59 +02:00
parent 35c19e1e80
commit 56e7d72a64
8 changed files with 43 additions and 129 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.4 KiB

View File

@ -15,31 +15,18 @@ import sys
trained_model_path = sys.argv[1]
output_path = sys.argv[2]
# treshold = float(sys.argv[3])
treshold = float(sys.argv[3])
#random seed
generate_seed = np.random.rand(12288).reshape(1,96,128)
# load and predict
model = pickle.load(open(trained_model_path, 'rb'))
generated_sample = model.predict(generate_seed)
generated_sample = generated_sample.reshape(96,128)
generated_music = np.empty((0,128))
for note in range(100):
generated_vector = model.predict(generate_seed).reshape(1,4,128)
generated_notes = np.zeros((4,128))
for i, col in enumerate(generated_vector[0]):
best_note = np.argmax(col)
generated_notes[i][best_note] = 1
generate_seed = np.concatenate([generated_notes, generate_seed[0][:-4]]).reshape(1,96,128)
generated_music = np.concatenate([generated_music, generated_notes])
# generated_sample = generated_sample.reshape(96,128)
generated_sample = generated_music
# print(generated_music)
# binarize generated music
# generated_sample = generated_sample > 0 * generated_sample.max()
generated_sample = generated_sample > treshold * generated_sample.max()
#save to midi
midi.to_midi(generated_sample, output_path='{}.mid'.format(output_path) )

View File

@ -18,67 +18,22 @@ import pickle
def to_samples(midi_file_path, midi_res=settings.midi_resolution):
# TODO: add transpositions of every sample to every possible key transposition
# np.roll(sample, pitch_interval, axis=1) for transposition
# np.roll(sample, time_steps, axis=0) for time shifting
all_X_samples = []
all_y_samples = []
for track in roll.Multitrack(midi_file_path).tracks:
if not track.is_drum:
# TODO: this makes rollable samples and dataset of y_train for prdiction
# the idea is to predict next N timesteps from prevous M timesteps
m_timesteps = 96
n_next_notes = 4
track_timesteps = track.pianoroll.shape[0] - (m_timesteps + n_next_notes)
X_track_samples = []
y_track_samples = []
for i in range(track_timesteps):
X = track.pianoroll[i : i + m_timesteps].reshape(96,128)
y = track.pianoroll[i + m_timesteps : i + m_timesteps + n_next_notes].reshape(4,128)
X_track_samples.append(X)
y_track_samples.append(y)
all_X_samples.extend(X_track_samples)
all_y_samples.extend(y_track_samples)
else:
# TODO: add code for drums samples
pass
return all_X_samples, all_y_samples
def to_samples_by_instrument(midi_file_path, midi_res=settings.midi_resolution):
# add transpositions of every sample to every possible key transposition
# np.roll(sample, pitch_interval, axis=1) for transposition
# np.roll(sample, time_steps, axis=0) for time shifting
# TODO: make rollable samples with train_Y set
fill_empty_array = lambda : [ np.empty((0, 96, 128)) , np.empty((0, 1, 128)) ]
fill_empty_array = lambda : np.empty((0, 96, 128))
samples_by_instrument = defaultdict(fill_empty_array)
all_beats = np.empty((0, 96, 128))
for track in roll.Multitrack(midi_file_path).tracks:
if not track.is_drum:
key = track.program + 1
# TODO: this makes pack of samples of N x 96 x 128 shape
# number_of_beats = floor(track.pianoroll.shape[0] / midi_res)
# track_pianoroll = track.pianoroll[: number_of_beats * midi_res]
# track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128)
# TODO: this makes rollable samples and dataset of y_train for prdiction
# the idea is to predict next n notes from prevous m timesteps
m_timesteps = 96
n_next_notes = 4
for i, value in tqdm(enumerate(track.pianoroll[:-(m_timesteps + n_next_notes)])):
X = track.pianoroll[i : i + m_timesteps].reshape(1,96,128)
y = track.pianoroll[i + m_timesteps : i + m_timesteps + n_next_notes].reshape(1,1,128)
samples_by_instrument[key][0] = np.concatenate([X, samples_by_instrument[ key ][0]], axis=0)
samples_by_instrument[key][1] = np.concatenate([y, samples_by_instrument[ key ][1]], axis=0)
# samples_by_instrument[track.program + 1][0] = np.concatenate([track_beats, samples_by_instrument[ track.program + 1]], axis=0)
# this makes pack of samples of N x 96 x 128 shape
number_of_beats = floor(track.pianoroll.shape[0] / midi_res)
track_pianoroll = track.pianoroll[: number_of_beats * midi_res]
track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128)
samples_by_instrument[track.program + 1] = np.concatenate([track_beats, samples_by_instrument[ track.program + 1]], axis=0)
else:
# TODO: add code for drums samples
pass
@ -89,7 +44,7 @@ def to_midi(samples, output_path=settings.generated_midi_path, program=0, tempo=
return_midi = roll.Multitrack(tracks=tracks, tempo=tempo, downbeat=[0, 96, 192, 288], beat_resolution=beat_resolution)
roll.write(return_midi, output_path)
# todo: this function is running too slow.
# TODO: this function is running too slow.
def delete_empty_samples(sample_pack):
print('Deleting empty samples...')
temp_sample_pack = sample_pack
@ -104,28 +59,24 @@ def delete_empty_samples(sample_pack):
def main():
print('Exporting...')
# from collections import defaultdict
# fill_empty_array = lambda : [ np.empty((0, 96, 128)) , np.empty((0, 1, 128)) ]
# samples_pack_by_instrument = defaultdict(fill_empty_array)
from collections import defaultdict
fill_empty_array = lambda : np.empty((0, 96, 128))
samples_pack_by_instrument = defaultdict(fill_empty_array)
# sample_pack = np.empty((0,settings.midi_resolution,128))
X_train = []
y_train = []
sample_pack = np.empty((0,settings.midi_resolution,128))
for midi_file in tqdm(os.listdir(settings.midi_dir)):
print(midi_file)
midi_file_path = '{}/{}'.format(settings.midi_dir, midi_file)
X, y = to_samples(midi_file_path)
# if midi_samples is None:
# continue
X_train.extend(X)
y_train.extend(y)
# this is for intrument separation
# for key, value in midi_samples.items():
# samples_pack_by_instrument[key][0] = np.concatenate((samples_pack_by_instrument[key][0], value[0]), axis=0)
# samples_pack_by_instrument[key][1] = np.concatenate((samples_pack_by_instrument[key][1], value[1]), axis=0)
midi_samples = to_samples(midi_file_path)
if midi_samples is None:
continue
# TODO: Delete empty samples
# this is for intrument separation
for key, value in midi_samples.items():
samples_pack_by_instrument[key] = np.concatenate((samples_pack_by_instrument[key], value), axis=0)
# TODO: Delete empty samples - optimize
# sample_pack = delete_empty_samples(sample_pack)
# save as compressed pickle (sample-dictionary)
@ -133,29 +84,18 @@ def main():
# pickle.dump(dict(samples_pack_by_instrument), sfile)
# this is for intrument separation
# print('Saving...')
# for key, value in tqdm(samples_pack_by_instrument.items()):
# np.savez_compressed('data/samples/X_{}.npz'.format(settings.midi_program[key][0]), value)
# np.savez_compressed('data/samples/y_{}.npz'.format(settings.midi_program[key][1]), value)
# this if for one big list
print('Saving...')
for key, value in tqdm(samples_pack_by_instrument.items()):
np.savez_compressed('data/samples/{}.npz'.format(settings.midi_program[key]), value)
np_X_train = np.array(X_train)
np_y_train = np.array(y_train)
print(np_X_train.shape, np_y_train.shape)
np.savez_compressed('data/samples/X_{}.npz'.format(1), np_X_train)
np.savez_compressed('data/samples/y_{}.npz'.format(1), np_y_train)
# Give a preview of what samples looks like
fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20))
for idx, ax in enumerate(axes.ravel()):
n = np.random.randint(0, value.shape[0])
sample = value[n]
ax.imshow(sample, cmap = plt.get_cmap('gray'))
plt.savefig('data/samples/{}.png'.format(settings.midi_program[key]))
# Give a preview of what samples looks like
# fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20))
# for idx, ax in enumerate(axes.ravel()):
# n = np.random.randint(0, value[0].shape[0])
# sample = value[n]
# ax.imshow(sample, cmap = plt.get_cmap('gray'))
# plt.savefig('data/samples/{}.png'.format(settings.midi_program[key]))
print('Exported {} samples'.format(np_X_train.shape[0]))
print('Done!')
if __name__ == '__main__':

View File

@ -9,42 +9,29 @@ import numpy as np
import sys
import pickle
train_data_path_X = sys.argv[1]
train_data_path_y = sys.argv[2]
save_model_path = sys.argv[3]
epochs = int(sys.argv[4])
# model architecture
# model = Sequential()
# model.add(LSTM(128, activation='relu', input_shape=(96,128)))
# model.add(RepeatVector(96))
# model.add(LSTM(128, activation='softmax', return_sequences=True))
# model.add(TimeDistributed(Dense(128)))
#
# model.compile(optimizer='adam',
# loss='categorical_crossentropy',
# metrics=['accuracy'])
train_data_path = sys.argv[1]
save_model_path = sys.argv[2]
epochs = int(sys.argv[3])
model = Sequential()
model.add(LSTM(128,input_shape=(96, 128),return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(512))
# model.add(Dropout(0.3))
# model.add(Dense(128))
model.add(LSTM(128))
model.add(Dense(128))
model.add(Dropout(0.3))
model.add(Dense(128*96))
model.add(Activation('softmax'))
model.add(Reshape((4, 128)))
model.add(Reshape((96, 128)))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
# load training data
print('Reading samples from: {}'.format(train_data_path_X))
train_X = np.load(train_data_path_X)['arr_0']
train_y = np.load(train_data_path_y)['arr_0']
print('Traing Samples: {}'.format(train_data_path))
train_X = np.load(train_data_path)['arr_0']
# model training
model.fit(train_X, train_y, epochs=epochs, batch_size=32)
model.fit(train_X, train_X, epochs=epochs, batch_size=32)
# save trained model
pickle_path = '{}.pickle'.format(save_model_path)