OPTIMIZED: collecting samples in python list instead of np.array, empty array deletion in fly

This commit is contained in:
Cezary Pukownik 2019-05-31 10:25:16 +02:00
parent 8b00b20657
commit 76f26837cd

View File

@ -2,45 +2,44 @@
import settings import settings
import pypianoroll as roll import pypianoroll as roll
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import os import os
from tqdm import tqdm from tqdm import tqdm
from math import floor from math import floor
import sys import sys
import pickle
from tqdm import tqdm
from tqdm import trange
from collections import defaultdict from collections import defaultdict
import bz2
import pickle import pickle
midi_folder_path = sys.argv[1] midi_folder_path = sys.argv[1]
output_path = sys.argv[2] output_path = sys.argv[2]
def to_samples(midi_file_path, midi_res=settings.midi_resolution): def to_samples(multitrack, midi_res=settings.midi_resolution, how='by_group'):
# add transpositions of every sample to every possible key transposition #how = 'by_group', 'by_instrument', 'merged',
# TODO: add transpositions of every sample to every possible key transposition
# np.roll(sample, pitch_interval, axis=1) for transposition # np.roll(sample, pitch_interval, axis=1) for transposition
# np.roll(sample, time_steps, axis=0) for time shifting # np.roll(sample, time_steps, axis=0) for time shifting
fill_empty_array = lambda : np.empty((0, 96, 128)) samples_by_instrument = defaultdict( lambda : [] )
samples_by_instrument = defaultdict(fill_empty_array)
all_beats = np.empty((0, 96, 128))
for track in roll.Multitrack(midi_file_path).tracks: for track in multitrack.tracks:
if not track.is_drum:
key = settings.midi_group[track.program + 1] key = settings.midi_group[track.program + 1] if not track.is_drum else 'Drums'
else:
key = 'Drums'
# this makes pack of samples of N x 96 x 128 shape # this makes pack of samples of N x 96 x 128 shape
number_of_beats = floor(track.pianoroll.shape[0] / midi_res) number_of_beats = floor(track.pianoroll.shape[0] / midi_res)
track_pianoroll = track.pianoroll[: number_of_beats * midi_res] track_pianoroll = track.pianoroll[: number_of_beats * midi_res]
track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128) track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128)
# save collected pack of data to dictionary with samples packs for every instrument # save collected pack of data to dictionary with samples packs for groups of instruments
samples_by_instrument[key] = np.concatenate([track_beats, samples_by_instrument[key]], axis=0) for sample in track_beats:
if sample.sum() != 0:
samples_by_instrument[key].append(sample)
# TODO: add posibility of choosing between saving samples to groups of instrument, or to every instrument separatly or with no differance
# TODO: add option, for looking only for one instrument/group
# TODO: add option for colecting, more than one beat per sample (min 4)
return samples_by_instrument return samples_by_instrument
@ -50,52 +49,34 @@ def to_midi(samples, output_path=settings.generated_midi_path, program=0, tempo=
roll.write(return_midi, output_path) roll.write(return_midi, output_path)
return return_midi return return_midi
def delete_empty_samples(sample_pack): # TODO: Make optial function to erase information of note lenth - ??
non_empty_arrays = [] def ignore_note_lenght():
for sample in sample_pack: pass
if sample.sum() != 0:
non_empty_arrays.append(sample)
return np.array(non_empty_arrays)
def main(): def main():
print('Exporting...') print('Exporting...')
from collections import defaultdict
fill_empty_array = lambda : np.empty((0, 96, 128)) samples_pack_by_instrument = defaultdict( lambda : list() )
samples_pack_by_instrument = defaultdict(fill_empty_array)
sample_pack = np.empty((0,settings.midi_resolution,128))
for directory, subdirectories, files in os.walk(midi_folder_path): for directory, subdirectories, files in os.walk(midi_folder_path):
for midi_file in tqdm(files): for midi_file in tqdm(files):
midi_file_path = os.path.join(directory, midi_file) midi_file_path = os.path.join(directory, midi_file)
#load midi ro pypianoroll - Multirack
try: try:
midi_samples = to_samples(midi_file_path) multitrack = roll.parse(midi_file_path)
except: except:
pass # IDEA: Log errors, and save to file?
if midi_samples is None:
continue continue
# this is for intrument separation for key, value in to_samples(multitrack).items():
for key, value in midi_samples.items(): samples_pack_by_instrument[key].extend(value)
value = delete_empty_samples(value)
samples_pack_by_instrument[key] = np.concatenate((samples_pack_by_instrument[key], value), axis=0)
# save as compressed pickle (sample-dictionary)
# sfile = bz2.BZ2File('data/samples.pickle', 'w')
# pickle.dump(dict(samples_pack_by_instrument), sfile)
# this is for intrument separation # this is for intrument separation
print('Saving...') print('Saving...')
if not os.path.exists(output_path):
os.makedirs(output_path)
for key, value in tqdm(samples_pack_by_instrument.items()): for key, value in tqdm(samples_pack_by_instrument.items()):
if not os.path.exists(output_path): np.savez_compressed('{}/{}.npz'.format(output_path, key), np.array(value))
os.makedirs(output_path)
np.savez_compressed('{}/{}.npz'.format(output_path, key), value)
# # Give a preview of what samples looks like
# fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20))
# for idx, ax in enumerate(axes.ravel()):
# n = np.random.randint(0, value.shape[0])
# sample = value[n]
# ax.imshow(sample, cmap = plt.get_cmap('gray'))
# plt.savefig('data/samples/{}.png'.format(settings.midi_program[key]))
print('Done!') print('Done!')