lstm autoencoder, for now with the best results
This commit is contained in:
parent
35c19e1e80
commit
56e7d72a64
Binary file not shown.
BIN
project/data/models/piano_best.pickle
Normal file
BIN
project/data/models/piano_best.pickle
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 8.4 KiB |
@ -15,31 +15,18 @@ import sys
|
||||
|
||||
trained_model_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
# treshold = float(sys.argv[3])
|
||||
treshold = float(sys.argv[3])
|
||||
|
||||
#random seed
|
||||
generate_seed = np.random.rand(12288).reshape(1,96,128)
|
||||
|
||||
# load and predict
|
||||
model = pickle.load(open(trained_model_path, 'rb'))
|
||||
generated_sample = model.predict(generate_seed)
|
||||
generated_sample = generated_sample.reshape(96,128)
|
||||
|
||||
generated_music = np.empty((0,128))
|
||||
|
||||
for note in range(100):
|
||||
generated_vector = model.predict(generate_seed).reshape(1,4,128)
|
||||
generated_notes = np.zeros((4,128))
|
||||
for i, col in enumerate(generated_vector[0]):
|
||||
best_note = np.argmax(col)
|
||||
generated_notes[i][best_note] = 1
|
||||
|
||||
generate_seed = np.concatenate([generated_notes, generate_seed[0][:-4]]).reshape(1,96,128)
|
||||
generated_music = np.concatenate([generated_music, generated_notes])
|
||||
|
||||
# generated_sample = generated_sample.reshape(96,128)
|
||||
generated_sample = generated_music
|
||||
# print(generated_music)
|
||||
# binarize generated music
|
||||
# generated_sample = generated_sample > 0 * generated_sample.max()
|
||||
generated_sample = generated_sample > treshold * generated_sample.max()
|
||||
|
||||
#save to midi
|
||||
midi.to_midi(generated_sample, output_path='{}.mid'.format(output_path) )
|
||||
|
116
project/midi.py
116
project/midi.py
@ -18,67 +18,22 @@ import pickle
|
||||
|
||||
def to_samples(midi_file_path, midi_res=settings.midi_resolution):
|
||||
|
||||
# TODO: add transpositions of every sample to every possible key transposition
|
||||
# np.roll(sample, pitch_interval, axis=1) for transposition
|
||||
# np.roll(sample, time_steps, axis=0) for time shifting
|
||||
all_X_samples = []
|
||||
all_y_samples = []
|
||||
for track in roll.Multitrack(midi_file_path).tracks:
|
||||
if not track.is_drum:
|
||||
# TODO: this makes rollable samples and dataset of y_train for prdiction
|
||||
# the idea is to predict next N timesteps from prevous M timesteps
|
||||
m_timesteps = 96
|
||||
n_next_notes = 4
|
||||
|
||||
track_timesteps = track.pianoroll.shape[0] - (m_timesteps + n_next_notes)
|
||||
|
||||
X_track_samples = []
|
||||
y_track_samples = []
|
||||
for i in range(track_timesteps):
|
||||
X = track.pianoroll[i : i + m_timesteps].reshape(96,128)
|
||||
y = track.pianoroll[i + m_timesteps : i + m_timesteps + n_next_notes].reshape(4,128)
|
||||
X_track_samples.append(X)
|
||||
y_track_samples.append(y)
|
||||
|
||||
all_X_samples.extend(X_track_samples)
|
||||
all_y_samples.extend(y_track_samples)
|
||||
else:
|
||||
# TODO: add code for drums samples
|
||||
pass
|
||||
return all_X_samples, all_y_samples
|
||||
|
||||
def to_samples_by_instrument(midi_file_path, midi_res=settings.midi_resolution):
|
||||
|
||||
# add transpositions of every sample to every possible key transposition
|
||||
# np.roll(sample, pitch_interval, axis=1) for transposition
|
||||
# np.roll(sample, time_steps, axis=0) for time shifting
|
||||
|
||||
# TODO: make rollable samples with train_Y set
|
||||
|
||||
fill_empty_array = lambda : [ np.empty((0, 96, 128)) , np.empty((0, 1, 128)) ]
|
||||
fill_empty_array = lambda : np.empty((0, 96, 128))
|
||||
samples_by_instrument = defaultdict(fill_empty_array)
|
||||
all_beats = np.empty((0, 96, 128))
|
||||
|
||||
for track in roll.Multitrack(midi_file_path).tracks:
|
||||
if not track.is_drum:
|
||||
key = track.program + 1
|
||||
# TODO: this makes pack of samples of N x 96 x 128 shape
|
||||
# number_of_beats = floor(track.pianoroll.shape[0] / midi_res)
|
||||
# track_pianoroll = track.pianoroll[: number_of_beats * midi_res]
|
||||
# track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128)
|
||||
|
||||
# TODO: this makes rollable samples and dataset of y_train for prdiction
|
||||
# the idea is to predict next n notes from prevous m timesteps
|
||||
m_timesteps = 96
|
||||
n_next_notes = 4
|
||||
for i, value in tqdm(enumerate(track.pianoroll[:-(m_timesteps + n_next_notes)])):
|
||||
X = track.pianoroll[i : i + m_timesteps].reshape(1,96,128)
|
||||
y = track.pianoroll[i + m_timesteps : i + m_timesteps + n_next_notes].reshape(1,1,128)
|
||||
|
||||
samples_by_instrument[key][0] = np.concatenate([X, samples_by_instrument[ key ][0]], axis=0)
|
||||
samples_by_instrument[key][1] = np.concatenate([y, samples_by_instrument[ key ][1]], axis=0)
|
||||
|
||||
# samples_by_instrument[track.program + 1][0] = np.concatenate([track_beats, samples_by_instrument[ track.program + 1]], axis=0)
|
||||
# this makes pack of samples of N x 96 x 128 shape
|
||||
number_of_beats = floor(track.pianoroll.shape[0] / midi_res)
|
||||
track_pianoroll = track.pianoroll[: number_of_beats * midi_res]
|
||||
track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128)
|
||||
samples_by_instrument[track.program + 1] = np.concatenate([track_beats, samples_by_instrument[ track.program + 1]], axis=0)
|
||||
else:
|
||||
# TODO: add code for drums samples
|
||||
pass
|
||||
@ -89,7 +44,7 @@ def to_midi(samples, output_path=settings.generated_midi_path, program=0, tempo=
|
||||
return_midi = roll.Multitrack(tracks=tracks, tempo=tempo, downbeat=[0, 96, 192, 288], beat_resolution=beat_resolution)
|
||||
roll.write(return_midi, output_path)
|
||||
|
||||
# todo: this function is running too slow.
|
||||
# TODO: this function is running too slow.
|
||||
def delete_empty_samples(sample_pack):
|
||||
print('Deleting empty samples...')
|
||||
temp_sample_pack = sample_pack
|
||||
@ -104,28 +59,24 @@ def delete_empty_samples(sample_pack):
|
||||
def main():
|
||||
print('Exporting...')
|
||||
|
||||
# from collections import defaultdict
|
||||
# fill_empty_array = lambda : [ np.empty((0, 96, 128)) , np.empty((0, 1, 128)) ]
|
||||
# samples_pack_by_instrument = defaultdict(fill_empty_array)
|
||||
from collections import defaultdict
|
||||
fill_empty_array = lambda : np.empty((0, 96, 128))
|
||||
samples_pack_by_instrument = defaultdict(fill_empty_array)
|
||||
|
||||
# sample_pack = np.empty((0,settings.midi_resolution,128))
|
||||
X_train = []
|
||||
y_train = []
|
||||
sample_pack = np.empty((0,settings.midi_resolution,128))
|
||||
|
||||
for midi_file in tqdm(os.listdir(settings.midi_dir)):
|
||||
print(midi_file)
|
||||
midi_file_path = '{}/{}'.format(settings.midi_dir, midi_file)
|
||||
X, y = to_samples(midi_file_path)
|
||||
# if midi_samples is None:
|
||||
# continue
|
||||
X_train.extend(X)
|
||||
y_train.extend(y)
|
||||
# this is for intrument separation
|
||||
# for key, value in midi_samples.items():
|
||||
# samples_pack_by_instrument[key][0] = np.concatenate((samples_pack_by_instrument[key][0], value[0]), axis=0)
|
||||
# samples_pack_by_instrument[key][1] = np.concatenate((samples_pack_by_instrument[key][1], value[1]), axis=0)
|
||||
midi_samples = to_samples(midi_file_path)
|
||||
if midi_samples is None:
|
||||
continue
|
||||
|
||||
# TODO: Delete empty samples
|
||||
# this is for intrument separation
|
||||
for key, value in midi_samples.items():
|
||||
samples_pack_by_instrument[key] = np.concatenate((samples_pack_by_instrument[key], value), axis=0)
|
||||
|
||||
# TODO: Delete empty samples - optimize
|
||||
# sample_pack = delete_empty_samples(sample_pack)
|
||||
|
||||
# save as compressed pickle (sample-dictionary)
|
||||
@ -133,29 +84,18 @@ def main():
|
||||
# pickle.dump(dict(samples_pack_by_instrument), sfile)
|
||||
|
||||
# this is for intrument separation
|
||||
# print('Saving...')
|
||||
# for key, value in tqdm(samples_pack_by_instrument.items()):
|
||||
# np.savez_compressed('data/samples/X_{}.npz'.format(settings.midi_program[key][0]), value)
|
||||
# np.savez_compressed('data/samples/y_{}.npz'.format(settings.midi_program[key][1]), value)
|
||||
|
||||
# this if for one big list
|
||||
print('Saving...')
|
||||
for key, value in tqdm(samples_pack_by_instrument.items()):
|
||||
np.savez_compressed('data/samples/{}.npz'.format(settings.midi_program[key]), value)
|
||||
|
||||
np_X_train = np.array(X_train)
|
||||
np_y_train = np.array(y_train)
|
||||
print(np_X_train.shape, np_y_train.shape)
|
||||
np.savez_compressed('data/samples/X_{}.npz'.format(1), np_X_train)
|
||||
np.savez_compressed('data/samples/y_{}.npz'.format(1), np_y_train)
|
||||
# Give a preview of what samples looks like
|
||||
fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20))
|
||||
for idx, ax in enumerate(axes.ravel()):
|
||||
n = np.random.randint(0, value.shape[0])
|
||||
sample = value[n]
|
||||
ax.imshow(sample, cmap = plt.get_cmap('gray'))
|
||||
plt.savefig('data/samples/{}.png'.format(settings.midi_program[key]))
|
||||
|
||||
# Give a preview of what samples looks like
|
||||
# fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20))
|
||||
# for idx, ax in enumerate(axes.ravel()):
|
||||
# n = np.random.randint(0, value[0].shape[0])
|
||||
# sample = value[n]
|
||||
# ax.imshow(sample, cmap = plt.get_cmap('gray'))
|
||||
# plt.savefig('data/samples/{}.png'.format(settings.midi_program[key]))
|
||||
|
||||
print('Exported {} samples'.format(np_X_train.shape[0]))
|
||||
print('Done!')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -9,42 +9,29 @@ import numpy as np
|
||||
import sys
|
||||
import pickle
|
||||
|
||||
train_data_path_X = sys.argv[1]
|
||||
train_data_path_y = sys.argv[2]
|
||||
save_model_path = sys.argv[3]
|
||||
epochs = int(sys.argv[4])
|
||||
|
||||
# model architecture
|
||||
# model = Sequential()
|
||||
# model.add(LSTM(128, activation='relu', input_shape=(96,128)))
|
||||
# model.add(RepeatVector(96))
|
||||
# model.add(LSTM(128, activation='softmax', return_sequences=True))
|
||||
# model.add(TimeDistributed(Dense(128)))
|
||||
#
|
||||
# model.compile(optimizer='adam',
|
||||
# loss='categorical_crossentropy',
|
||||
# metrics=['accuracy'])
|
||||
train_data_path = sys.argv[1]
|
||||
save_model_path = sys.argv[2]
|
||||
epochs = int(sys.argv[3])
|
||||
|
||||
model = Sequential()
|
||||
model.add(LSTM(128,input_shape=(96, 128),return_sequences=True))
|
||||
model.add(Dropout(0.3))
|
||||
model.add(LSTM(512, return_sequences=True))
|
||||
model.add(Dropout(0.3))
|
||||
model.add(LSTM(512))
|
||||
model.add(Dense(512))
|
||||
# model.add(Dropout(0.3))
|
||||
# model.add(Dense(128))
|
||||
model.add(LSTM(128))
|
||||
model.add(Dense(128))
|
||||
model.add(Dropout(0.3))
|
||||
model.add(Dense(128*96))
|
||||
model.add(Activation('softmax'))
|
||||
model.add(Reshape((4, 128)))
|
||||
model.add(Reshape((96, 128)))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
|
||||
# load training data
|
||||
print('Reading samples from: {}'.format(train_data_path_X))
|
||||
train_X = np.load(train_data_path_X)['arr_0']
|
||||
train_y = np.load(train_data_path_y)['arr_0']
|
||||
print('Traing Samples: {}'.format(train_data_path))
|
||||
train_X = np.load(train_data_path)['arr_0']
|
||||
|
||||
# model training
|
||||
model.fit(train_X, train_y, epochs=epochs, batch_size=32)
|
||||
model.fit(train_X, train_X, epochs=epochs, batch_size=32)
|
||||
|
||||
# save trained model
|
||||
pickle_path = '{}.pickle'.format(save_model_path)
|
||||
|
Loading…
Reference in New Issue
Block a user