From 76f26837cdd60992a3519aa035b45864272b9179 Mon Sep 17 00:00:00 2001 From: Cezary Pukownik Date: Fri, 31 May 2019 10:25:16 +0200 Subject: [PATCH] OPTIMIZED: collecting samples in python list instead of np.array, empty array deletion in fly --- project/midi.py | 79 +++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 49 deletions(-) diff --git a/project/midi.py b/project/midi.py index 42f1627..447bbc2 100644 --- a/project/midi.py +++ b/project/midi.py @@ -2,45 +2,44 @@ import settings import pypianoroll as roll -import matplotlib.pyplot as plt import numpy as np import os from tqdm import tqdm from math import floor import sys -import pickle -from tqdm import tqdm -from tqdm import trange from collections import defaultdict -import bz2 import pickle midi_folder_path = sys.argv[1] output_path = sys.argv[2] -def to_samples(midi_file_path, midi_res=settings.midi_resolution): +def to_samples(multitrack, midi_res=settings.midi_resolution, how='by_group'): - # add transpositions of every sample to every possible key transposition + #how = 'by_group', 'by_instrument', 'merged', + + # TODO: add transpositions of every sample to every possible key transposition # np.roll(sample, pitch_interval, axis=1) for transposition # np.roll(sample, time_steps, axis=0) for time shifting - fill_empty_array = lambda : np.empty((0, 96, 128)) - samples_by_instrument = defaultdict(fill_empty_array) - all_beats = np.empty((0, 96, 128)) + samples_by_instrument = defaultdict( lambda : [] ) - for track in roll.Multitrack(midi_file_path).tracks: - if not track.is_drum: - key = settings.midi_group[track.program + 1] - else: - key = 'Drums' + for track in multitrack.tracks: + + key = settings.midi_group[track.program + 1] if not track.is_drum else 'Drums' # this makes pack of samples of N x 96 x 128 shape number_of_beats = floor(track.pianoroll.shape[0] / midi_res) track_pianoroll = track.pianoroll[: number_of_beats * midi_res] track_beats = track_pianoroll.reshape(number_of_beats, midi_res, 128) - # save collected pack of data to dictionary with samples packs for every instrument - samples_by_instrument[key] = np.concatenate([track_beats, samples_by_instrument[key]], axis=0) + # save collected pack of data to dictionary with samples packs for groups of instruments + for sample in track_beats: + if sample.sum() != 0: + samples_by_instrument[key].append(sample) + + # TODO: add posibility of choosing between saving samples to groups of instrument, or to every instrument separatly or with no differance + # TODO: add option, for looking only for one instrument/group + # TODO: add option for colecting, more than one beat per sample (min 4) return samples_by_instrument @@ -50,52 +49,34 @@ def to_midi(samples, output_path=settings.generated_midi_path, program=0, tempo= roll.write(return_midi, output_path) return return_midi -def delete_empty_samples(sample_pack): - non_empty_arrays = [] - for sample in sample_pack: - if sample.sum() != 0: - non_empty_arrays.append(sample) - return np.array(non_empty_arrays) +# TODO: Make optial function to erase information of note lenth - ?? +def ignore_note_lenght(): + pass def main(): print('Exporting...') - from collections import defaultdict - fill_empty_array = lambda : np.empty((0, 96, 128)) - samples_pack_by_instrument = defaultdict(fill_empty_array) - sample_pack = np.empty((0,settings.midi_resolution,128)) + + samples_pack_by_instrument = defaultdict( lambda : list() ) + for directory, subdirectories, files in os.walk(midi_folder_path): for midi_file in tqdm(files): midi_file_path = os.path.join(directory, midi_file) + #load midi ro pypianoroll - Multirack try: - midi_samples = to_samples(midi_file_path) + multitrack = roll.parse(midi_file_path) except: - pass - if midi_samples is None: + # IDEA: Log errors, and save to file? continue - # this is for intrument separation - for key, value in midi_samples.items(): - value = delete_empty_samples(value) - samples_pack_by_instrument[key] = np.concatenate((samples_pack_by_instrument[key], value), axis=0) - - # save as compressed pickle (sample-dictionary) - # sfile = bz2.BZ2File('data/samples.pickle', 'w') - # pickle.dump(dict(samples_pack_by_instrument), sfile) + for key, value in to_samples(multitrack).items(): + samples_pack_by_instrument[key].extend(value) # this is for intrument separation print('Saving...') + if not os.path.exists(output_path): + os.makedirs(output_path) for key, value in tqdm(samples_pack_by_instrument.items()): - if not os.path.exists(output_path): - os.makedirs(output_path) - np.savez_compressed('{}/{}.npz'.format(output_path, key), value) - - # # Give a preview of what samples looks like - # fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20)) - # for idx, ax in enumerate(axes.ravel()): - # n = np.random.randint(0, value.shape[0]) - # sample = value[n] - # ax.imshow(sample, cmap = plt.get_cmap('gray')) - # plt.savefig('data/samples/{}.png'.format(settings.midi_program[key])) + np.savez_compressed('{}/{}.npz'.format(output_path, key), np.array(value)) print('Done!')