praca-magisterska/project/midi.py

#!python3
#!/usr/bin/env python3

''' This module contains functions to endocing midi files into data samples
that is prepared for model training.

    midi_folder_path - the path to directiory containing midi files
    output_path - the output path where will be created samples of data

    Usage:
        >>> ./midi.py <midi_folder_path> <output_path>

'''

import settings
import pypianoroll as roll
import numpy as np
import os
from tqdm import tqdm
from math import floor
import sys
from collections import defaultdict
import pickle
from music21 import converter, instrument, note, chord, stream
import music21

midi_folder_path = sys.argv[1]
output_path = sys.argv[2]
seq_len = int(sys.argv[3])

def to_sequence(midi_path, seq_len):
    ''' This function is supposed to be used on one midi file in directory loop.
    Its encoding midi files, into sequances of given lenth as a train_X,
    and the next note as a train_y. Also splitting midi samples into 
    instrument group.

    Use for LSTM neural network.
        
        Parameters:
            - midi_path: path to midi file
            - seq_len: lenght of sequance before prediction
            
        Returns: Tuple of train_X, train_y directories'''
        
    seq_by_instrument = defaultdict( lambda : [] )
    midi_file = music21.converter.parse(midi_path)
    stream = music21.instrument.partitionByInstrument(midi_file)
    for part in stream:
        for event in part:
            if part.partName != None:
                # TODO: add note lenght as parameter
                if isinstance(event, music21.note.Note):
                    # to_export_event = (str(event.pitch), event.quarterLength)
                    to_export_event = str(event.pitch)
                    seq_by_instrument[part.partName].append(to_export_event)
                elif isinstance(event, music21.chord.Chord):
                    to_export_event = ' '.join(str(note) for note in event.pitches)
                    # to_export_event = (' '.join(str(note) for note in event.pitches), event.quarterLength)
                    seq_by_instrument[part.partName].append(to_export_event)

    X_train_by_instrument = defaultdict( lambda : [] )
    y_train_by_instrument = defaultdict( lambda : [] )

    for instrument, sequence in seq_by_instrument.items():
        for i in range(len(sequence)-(seq_len)) :
            X_train_by_instrument[instrument].append(np.array(sequence[i:i+seq_len])) # <seq lenth
            y_train_by_instrument[instrument].append(np.array(sequence[i+seq_len]))
            # TODO: Notes to integers

    return X_train_by_instrument, y_train_by_instrument

def main():
    print('Exporting...')

    train_X = defaultdict( lambda : [] )
    train_y = defaultdict( lambda : [] )

    for directory, subdirectories, files in os.walk(midi_folder_path):
        for midi_file in tqdm(files):
            midi_file_path = os.path.join(directory, midi_file)
            
            # some midi files can be corupted, and cannot be parsed
            # so we just omit corupted files, and go to the next file.
            try:
                _X_train, _y_train = to_sequence(midi_file_path, seq_len)
            except music21.midi.MidiException:
                continue

            for (X_key, X_value), (y_key, y_value) in zip(_X_train.items(), _y_train.items()):
                train_X[X_key].extend(np.array(X_value))
                train_y[y_key].extend(np.array(y_value))

    # this is for intrument separation
    print('Saving...')
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    for (X_key, X_value), (y_key, y_value) in tqdm(zip(train_X.items(), train_y.items())):
        if X_key == y_key:
            np.savez_compressed('{}/{}.npz'.format(output_path, X_key), np.array(X_value), np.array(y_value))

    print('Done!')

if __name__ == '__main__':
    main()
add docstrings, fix choose_by_prob 2019-06-19 13:40:35 +02:00			`#!python3`
simplify a little midi.py module 2019-05-29 10:36:34 +02:00			`#!/usr/bin/env python3`

add docstrings, fix choose_by_prob 2019-06-19 13:40:35 +02:00			`''' This module contains functions to endocing midi files into data samples`
			`that is prepared for model training.`

			`midi_folder_path - the path to directiory containing midi files`
			`output_path - the output path where will be created samples of data`

			`Usage:`
			`>>> ./midi.py <midi_folder_path> <output_path>`

			`'''`

simplify a little midi.py module 2019-05-29 10:36:34 +02:00			`import settings`
			`import pypianoroll as roll`
			`import numpy as np`
			`import os`
			`from tqdm import tqdm`
			`from math import floor`
			`import sys`
lstm - drop this branch, looking for other way to generate music 2019-05-30 11:23:34 +02:00			`from collections import defaultdict`
			`import pickle`
get it working, on music21 and sequence style enoding 2019-06-01 17:05:38 +02:00			`from music21 import converter, instrument, note, chord, stream`
			`import music21`
simplify a little midi.py module 2019-05-29 10:36:34 +02:00
added argv, fix no-dir-exist bug, loop through all files, dirs in input 2019-05-30 20:47:47 +02:00			`midi_folder_path = sys.argv[1]`
			`output_path = sys.argv[2]`
add docstrings, fix choose_by_prob 2019-06-19 13:40:35 +02:00			`seq_len = int(sys.argv[3])`

			`def to_sequence(midi_path, seq_len):`
			`''' This function is supposed to be used on one midi file in directory loop.`
			`Its encoding midi files, into sequances of given lenth as a train_X,`
			`and the next note as a train_y. Also splitting midi samples into`
			`instrument group.`
added argv, fix no-dir-exist bug, loop through all files, dirs in input 2019-05-30 20:47:47 +02:00
add docstrings, fix choose_by_prob 2019-06-19 13:40:35 +02:00			`Use for LSTM neural network.`

			`Parameters:`
			`- midi_path: path to midi file`
			`- seq_len: lenght of sequance before prediction`

			`Returns: Tuple of train_X, train_y directories'''`

get it working, on music21 and sequence style enoding 2019-06-01 17:05:38 +02:00			`seq_by_instrument = defaultdict( lambda : [] )`
			`midi_file = music21.converter.parse(midi_path)`
			`stream = music21.instrument.partitionByInstrument(midi_file)`
			`for part in stream:`
			`for event in part:`
			`if part.partName != None:`
			`# TODO: add note lenght as parameter`
			`if isinstance(event, music21.note.Note):`
			`# to_export_event = (str(event.pitch), event.quarterLength)`
			`to_export_event = str(event.pitch)`
			`seq_by_instrument[part.partName].append(to_export_event)`
			`elif isinstance(event, music21.chord.Chord):`
			`to_export_event = ' '.join(str(note) for note in event.pitches)`
			`# to_export_event = (' '.join(str(note) for note in event.pitches), event.quarterLength)`
			`seq_by_instrument[part.partName].append(to_export_event)`

			`X_train_by_instrument = defaultdict( lambda : [] )`
			`y_train_by_instrument = defaultdict( lambda : [] )`

			`for instrument, sequence in seq_by_instrument.items():`
add docstrings, fix choose_by_prob 2019-06-19 13:40:35 +02:00			`for i in range(len(sequence)-(seq_len)) :`
			`X_train_by_instrument[instrument].append(np.array(sequence[i:i+seq_len])) # <seq lenth`
			`y_train_by_instrument[instrument].append(np.array(sequence[i+seq_len]))`
get it working, on music21 and sequence style enoding 2019-06-01 17:05:38 +02:00			`# TODO: Notes to integers`

			`return X_train_by_instrument, y_train_by_instrument`
delete empty samples - optimized 2019-05-30 13:36:15 +02:00
simplify a little midi.py module 2019-05-29 10:36:34 +02:00			`def main():`
lstm - drop this branch, looking for other way to generate music 2019-05-30 11:23:34 +02:00			`print('Exporting...')`
OPTIMIZED: collecting samples in python list instead of np.array, empty array deletion in fly 2019-05-31 10:25:16 +02:00
get it working, on music21 and sequence style enoding 2019-06-01 17:05:38 +02:00			`train_X = defaultdict( lambda : [] )`
			`train_y = defaultdict( lambda : [] )`
OPTIMIZED: collecting samples in python list instead of np.array, empty array deletion in fly 2019-05-31 10:25:16 +02:00
added argv, fix no-dir-exist bug, loop through all files, dirs in input 2019-05-30 20:47:47 +02:00			`for directory, subdirectories, files in os.walk(midi_folder_path):`
			`for midi_file in tqdm(files):`
			`midi_file_path = os.path.join(directory, midi_file)`
add docstrings, fix choose_by_prob 2019-06-19 13:40:35 +02:00
			`# some midi files can be corupted, and cannot be parsed`
			`# so we just omit corupted files, and go to the next file.`
			`try:`
			`_X_train, _y_train = to_sequence(midi_file_path, seq_len)`
			`except music21.midi.MidiException:`
			`continue`
data extraction to groups of instruments; drums is now avaible 2019-05-30 23:11:25 +02:00
get it working, on music21 and sequence style enoding 2019-06-01 17:05:38 +02:00			`for (X_key, X_value), (y_key, y_value) in zip(_X_train.items(), _y_train.items()):`
			`train_X[X_key].extend(np.array(X_value))`
			`train_y[y_key].extend(np.array(y_value))`
lstm - drop this branch, looking for other way to generate music 2019-05-30 11:23:34 +02:00
			`# this is for intrument separation`
			`print('Saving...')`
OPTIMIZED: collecting samples in python list instead of np.array, empty array deletion in fly 2019-05-31 10:25:16 +02:00			`if not os.path.exists(output_path):`
			`os.makedirs(output_path)`
get it working, on music21 and sequence style enoding 2019-06-01 17:05:38 +02:00			`for (X_key, X_value), (y_key, y_value) in tqdm(zip(train_X.items(), train_y.items())):`
			`if X_key == y_key:`
			`np.savez_compressed('{}/{}.npz'.format(output_path, X_key), np.array(X_value), np.array(y_value))`
lstm - drop this branch, looking for other way to generate music 2019-05-30 11:23:34 +02:00
			`print('Done!')`
simplify a little midi.py module 2019-05-29 10:36:34 +02:00
			`if __name__ == '__main__':`
			`main()`