diff --git a/project/midi_processing.py b/project/midi_processing.py new file mode 100644 index 0000000..a042885 --- /dev/null +++ b/project/midi_processing.py @@ -0,0 +1,676 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +import os +import pickle +import numpy as np +from collections import defaultdict + +import pretty_midi as pm +from tqdm import tqdm + + +# In[98]: + + +TODO = ''' +TODO: put methods of data extraction for seq2seq arangment model to multitrack class [DONE] +TODO: make functions for data extraction for seq2seq model for riff/melody generation [DONE] +''' + + +# In[367]: + + +# '''return a dictionary with tracks indexes grouped by instrument class''' +# tracks = file.tracks +# names = [track.name for track in tracks] +# uniqe_instruemnts = set(names) +# tracks_by_instrument = dict() +# for key in uniqe_instruemnts: +# tracks_by_instrument[key] = [] + +# for i, track in enumerate(tracks): +# tracks_by_instrument[track.name].append(i) + +# tracks_by_instrument + + +# In[368]: + + +# def get_posible_pairs(instrument_x, instrument_y): +# '''it takes two lists, and return a list of tuples with every posible 2-element combination +# parameters: +# ----------- +# instrument_x, instrument_y : string {'Guitar','Bass','Drums'} +# a string that represent a instrument class you want to look for in midi file. + +# returns: +# ---------- +# pairs: list of tuples +# a list of posible 2-element combination of two lists +# ''' +# x_indexes = tracks_by_instrument[instrument_x] +# y_indexes = tracks_by_instrument[instrument_y] +# pairs = [] +# # pairs = [(x,y) for x in x_indexes for y in y_indexes] + +# for x in x_indexes: +# for y in y_indexes: +# pairs.append((x,y)) + +# return pairs + + +# In[369]: + + +# def get_common_bars_for_every_possible_pair(pairs) +# ''' for every possible pair of given instrument classes +# returns common bars from multitrack''' +# x_bars = [] +# y_bars = [] +# for x_track_index, y_track_index in pairs: +# _x_bars, _y_bars = get_common_bars(file.tracks[x_track_index], file.tracks[y_track_index]) +# x_bars.extend(_x_bars) +# y_bars.extend(_y_bars) + +# return x_bars, y_bars + + +# In[370]: + + +# def get_data_seq2seq_arrangment(self, bars_in_seq): +# ## This is the end of extracting data from midis to seq2seq arranging network. +# '''this method is returning a sequances of given lenth by rolling this lists of x and y for arrangemt generation''' +# x_seq = [] +# y_seq = [] + +# for i in range(len(x_bars) - bars_in_seq + 1): +# x_seq_to_add = [note for bar in x_bars[i:i+bars_in_seq] for note in bar ] +# y_seq_to_add = [note for bar in y_bars[i:i+bars_in_seq] for note in bar ] +# x_seq.append(x_seq_to_add) +# y_seq.append(y_seq_to_add) + +# len(x_seq), len(y_seq) +# # get_bar_len(y_seq[0]) + + +# In[371]: + + +# def get_track_by_instrument(self): +# '''return a dictionary with tracks indexes grouped by instrument class''' +# tracks = self.tracks +# names = [track.name for track in tracks] +# uniqe_instruemnts = set(names) +# tracks_by_instrument = dict() +# for key in uniqe_instruemnts: +# tracks_by_instrument[key] = [] + +# for i, track in enumerate(tracks): +# tracks_by_instrument[track.name].append(i) + +# return tracks_by_instrument + + +# In[372]: + + +# def get_data_seq2seq_melody(self,instrument_class, x_seq_len=4) +# '''return a list of bars with content for every track with given instrument class for melody generaiton''' + +# instrument_tracks = tracks_by_instrument[instrument_class] + +# for track_index in instrument_tracks: +# # make below as function: get_bars_with_content +# bars = file.tracks[track_index].stream_to_bars() +# bars_indexes_with_content = get_bar_indexes_with_content(bars) +# bars_with_content = [bars[i] for i in get_bar_indexes_with_content(bars)] + +# # make below as function: get_sequances_from_bars (for seq2seq melody generator) +# x_seq = [] +# y_bar = [] +# for i in range(len(bars_with_content)-seq_len-1): +# _x_seq = bars_with_content[i:i+seq_len] +# _y_bar = bars_with_content[i+seq_len] +# x_seq.append(_x_seq) +# y_bar.append(_y_bar) + + +# len(x_seq), len(y_bar) +# # print( ' x:' ,x_seq[1],'\n', 'y: ', y_bar[1],'\n', 'seq: ',bars_with_content[1:6]) + + +# In[15]: + + +def get_bar_indexes_with_content(bars): + '''this method is looking for non-empty bars in the tracks bars + the empty bar consist of only rest notes. + returns: a set of bars indexes with notes + ''' + bars_indexes_with_content = set() + for i, bar in enumerate(bars): + if bar_has_content(bar): + bars_indexes_with_content.add(i) + + return bars_indexes_with_content + + +# In[4]: + + +def get_bars_with_content(bars): + '''this method is looking for non-empty bars in the tracks bars + the empty bar consist of only rest notes. + returns: a set of bars with notes + ''' + bars_with_content = [] + for bar in bars: + if bar_has_content(bar): + bars_with_content.append(bar) + + return bars_with_content + + +# In[5]: + + +def get_common_bars(track_x,track_y): + '''return common bars, for two tracks is song + return X_train, y_train list of + ''' + bars_x = track_x.stream_to_bars() + bars_y = track_y.stream_to_bars() + bwc_x = get_bar_indexes_with_content(bars_x) + bwc_y = get_bar_indexes_with_content(bars_y) + + common_bars = bwc_x.intersection(bwc_y) + common_bars_x = [bars_x[i] for i in common_bars] + common_bars_y = [bars_y[i] for i in common_bars] + return common_bars_x, common_bars_y + + +# In[6]: + + +def get_bar_len(bar): + """calculate a lenth of a bar + parameters: + bar : list + list of "notes", tuples like (pitches, len) + """ + time = 0 + for note in bar: + time += note[1] + return time + + +# In[7]: + + +def bar_has_content(bar): + '''check if bar has any musical information, more accurate + it checks if in a bar is any non-rest event like note, or chord + + parameters: + ----------- + bar: list + list of notes + + return: + ------- + bool: + True if bas has concent and False of doesn't + ''' + bar_notes = len(bar) + count_rest = 0 + for note in bar: + if note[0] == (-1,): + count_rest += 1 + if count_rest == bar_notes: + return False + else: + return True + + +# In[8]: + + +def round_to_sixteenth_note(x, base=0.25): + '''round value to closest multiplication by base + in default to 0.25 witch is sisteenth note accuracy + ''' + + return base * round(x/base) + + +# In[9]: + + +def parse_pretty_midi_instrument(instrument, resolution, time_to_tick, key_offset): + ''' arguments: a prettyMidi instrument object + return: a custom SingleTrack object + ''' + + first_tick = None + prev_tick = 0 + prev_note_lenth = 0 + max_rest_len = 4.0 + + notes = defaultdict(lambda:[set(), set()]) + for note in instrument.notes: + if first_tick == None: +# first_tick = round_to_sixteenth_note(time_to_tick(note.start)/resolution) + first_tick = 0 + + tick = round_to_sixteenth_note(time_to_tick(note.start)/resolution) + # add rest if needed + if prev_tick != None: + act_tick = prev_tick + prev_note_lenth + if act_tick < tick: + rest_lenth = tick - act_tick + while rest_lenth > max_rest_len: + notes[act_tick] = [{-1},{max_rest_len}] + act_tick += max_rest_len + rest_lenth -= max_rest_len + notes[act_tick] = [{-1},{rest_lenth}] + + note_lenth = round_to_sixteenth_note(time_to_tick(note.end-note.start)/resolution) + + if -1 in notes[tick][0]: + notes[tick] = [set(), set()] + + if instrument.is_drum: + notes[tick][0].add(note.pitch) + else: + notes[tick][0].add(note.pitch+key_offset) + notes[tick][1].add(note_lenth) + + prev_tick = tick + prev_note_lenth = note_lenth + + notes = [(tuple(e[0]), max(e[1])) for e in notes.values()] + + name = 'Drums' if instrument.is_drum else pm.program_to_instrument_class(instrument.program) + return SingleTrack(name, instrument.program, instrument.is_drum, Stream(first_tick,notes) ) + + +# In[10]: + + +def remove_duplicated_sequences(xy_tuple): + x = xy_tuple[0] + y = xy_tuple[1] + x_freeze = [tuple(seq) for seq in x] + y_freeze = [tuple(seq) for seq in y] + unique_data = list(set(zip(x_freeze,y_freeze))) + x_unique = [seq[0] for seq in unique_data] + y_unique = [seq[1] for seq in unique_data] + return x_unique, y_unique + + +# In[11]: + + +class Stream(): + + def __init__ (self, first_tick, notes): + self.notes = notes + self.first_tick = first_tick + + def __repr__(self): + return ''.format(len(self.notes)) + + +# In[12]: + + +class SingleTrack(): + '''class of single track in midi file encoded from pretty midi library + + atributes: + ---------- + name: str + name of instrument class + program: int + midi instrument program + is_drum: bool + True if this track is drums track, False otherwise + stream: + Stream object of encoded music events (chords or notes) + ''' + + def __init__(self, name=None, program=None, is_drum=None, stream=None): + self.name = name + self.program = program + self.is_drum = is_drum + self.stream = stream + + def __repr__(self): + return "".format(self.name, self.program, self.is_drum) + + def to_pretty_midi_instrument(self, tempo=100): + '''is create a pretty midi Instrument object from self.stream.notes sequance + + parameters: + ----------- + self: SingleTrack object + + return: + ------- + track: PrettyMIDI.Instrument object + ''' + + tempo_strech = 100/tempo + track = pm.Instrument(program=self.program, is_drum=self.is_drum, name=self.name) + time = self.stream.first_tick * tempo_strech + for note in self.stream.notes: + note_pitch = note[0] + note_len = note[1] * tempo_strech + for pitch in note_pitch: + # if note is a rest (pause) + if pitch == -1: + break + event = pm.Note(velocity=100, pitch=pitch, start=time, end=time+note_len) + track.notes.append(event) + time = time + note_len + + return track + + def stream_to_bars(self, beat_per_bar=4): + '''it takes notes and split it into equaly time distibuted sequances + if note is between bars, the note is splited into two notes, with time sum equal to the note between bars. + arguments: + stream: list of "notes" + return: + bars: list: list of lists of notes, every list has equal time. in musical context it returns bars + ''' + # TODO: if last bar of sequance has less notes to has time equal given bar lenth it is left shorter + # fill the rest of bar with rests + notes = self.stream.notes + bars = [] + time = 0 + bar_index = 0 + add_tail = False + note_pitch = lambda note: note[0] + note_len = lambda note: note[1] + for note in notes: + try: + temp = bars[bar_index] + except IndexError: + bars.append([]) + + if add_tail: + bars[bar_index].append(tail_note) + time += note_len(tail_note) + add_tail = False + + time += note_len(note) + + if time == beat_per_bar: + bars[bar_index].append(note) + time = 0 + bar_index += 1 + + elif time > beat_per_bar: # if note is between bars + between_bars_note_len = note_len(note) + tail_note_len = time - beat_per_bar + leading_note_len = between_bars_note_len - tail_note_len + + leading_note = (note_pitch(note), leading_note_len) + bars[bar_index].append(leading_note) + tail_note = (note_pitch(note), tail_note_len) + + add_tail = True + time = 0 + bar_index += 1 + else: + bars[bar_index].append(note) + + return bars + + +# In[99]: + + +class MultiTrack(): + '''Class that represent one midi file + atributes: + pm_obj : PrettyMIDI class object of this midi file + res: resolution of midi + time_to_tick: function that coverts miliseconds to ticks. it depends on midi resolution for every midi + name: path to midi file + tracks: a list of SingleTrack objects + ''' + + def __init__(self, path=None, tempo=100): + self.tempo = tempo + self.pm_obj = pm.PrettyMIDI(path, initial_tempo=self.tempo) + self.res = self.pm_obj.resolution + self.time_to_tick = self.pm_obj.time_to_tick + self.name = path + self.tracks = [parse_pretty_midi_instrument(instrument, self.res, self.time_to_tick, self.get_pitch_offset_to_C() ) for instrument in self.pm_obj.instruments] + self.tracks_by_instrument = self.get_track_by_instrument() + + def get_multiseq(self): + '''tracks: list of SingleTrack objects + reaturn a dictionary of sequences for every sequence in SingleTrack + ''' + + multiseq_indexes = set([key for music_track in self.tracks for key in music_track.seq]) + multiseq = dict() + + for seq_id in multiseq_indexes: + multiseq[seq_id] = [] + + for single_track in self.tracks: + for key, value in single_track.seq.items(): + multiseq[key].append((single_track.name,value)) + + return multiseq + + def get_pitch_offset_to_C(self): + '''to get better train resoult without augmenting midis to all posible keys + we assumed that most frequent note is the rootnote of song then calculate + the offset of semitones to move song key to C. + + You should ADD this offset to note pitch to get it right + ''' + + hist = self.pm_obj.get_pitch_class_histogram() + offset = np.argmax(hist) + if offset > 6: + return 12-offset + else: + return -offset + + def save(self, path): + midi_file = pm.PrettyMIDI() + for track in self.tracks: + midi_file.instruments.append(track.to_pretty_midi_instrument(self.tempo)) + midi_file.write(path) + return midi_file + + def get_track_by_instrument(self): + '''return a dictionary with tracks indexes grouped by instrument class''' + tracks = self.tracks + names = [track.name for track in tracks] + uniqe_instruemnts = set(names) + tracks_by_instrument = dict() + for key in uniqe_instruemnts: + tracks_by_instrument[key] = [] + + for i, track in enumerate(tracks): + tracks_by_instrument[track.name].append(i) + + return tracks_by_instrument + + def get_common_bars_for_every_possible_pair(self, x_instrument, y_instrument): + ''' for every possible pair of given instrument classes + returns common bars from multitrack''' + x_bars = [] + y_bars = [] + pairs = self.get_posible_pairs(x_instrument, y_instrument) + for x_track_index, y_track_index in pairs: + _x_bars, _y_bars = get_common_bars(self.tracks[x_track_index], self.tracks[y_track_index]) + x_bars.extend(_x_bars) + y_bars.extend(_y_bars) + + return x_bars, y_bars + + def get_data_seq2seq_arrangment(self, x_instrument, y_instrument, bars_in_seq=4): + '''this method is returning a sequances of given lenth by rolling this lists of x and y for arrangemt generation''' + x_seq = [] + y_seq = [] + x_bars, y_bars = self.get_common_bars_for_every_possible_pair(x_instrument, y_instrument) + + for i in range(len(x_bars) - bars_in_seq + 1): + x_seq_to_add = [note for bar in x_bars[i:i+bars_in_seq] for note in bar ] + y_seq_to_add = [note for bar in y_bars[i:i+bars_in_seq] for note in bar ] + x_seq.append(x_seq_to_add) + y_seq.append(y_seq_to_add) + + return x_seq, y_seq + + def get_data_seq2seq_melody(self,instrument_class, x_seq_len=4): + '''return a list of bars with content for every track with given instrument class for melody generaiton''' + + instrument_tracks = self.tracks_by_instrument[instrument_class] + + for track_index in instrument_tracks: + bars = self.tracks[track_index].stream_to_bars() + bars_indexes_with_content = get_bar_indexes_with_content(bars) + bars_with_content = [bars[i] for i in get_bar_indexes_with_content(bars)] + + x_seq = [] + y_seq = [] + for i in range(len(bars_with_content)-x_seq_len-1): + _x_seq = [note for bar in bars_with_content[i:i+x_seq_len] for note in bar] + _y_bar = bars_with_content[i+x_seq_len] + x_seq.append(_x_seq) + y_seq.append(_y_bar) + + return x_seq, y_seq + + def get_posible_pairs(self, instrument_x, instrument_y): + '''it takes two lists, and return a list of tuples with every posible 2-element combination + parameters: + ----------- + instrument_x, instrument_y : string {'Guitar','Bass','Drums'} + a string that represent a instrument class you want to look for in midi file. + + returns: + ---------- + pairs: list of tuples + a list of posible 2-element combination of two lists + ''' + x_indexes = self.tracks_by_instrument[instrument_x] + y_indexes = self.tracks_by_instrument[instrument_y] +# pairs = [] + pairs = [(x,y) for x in x_indexes for y in y_indexes] + +# for x in x_indexes: +# for y in y_indexes: +# pairs.append((x,y)) + + return pairs + + def show_map(self): + print(self.name) + print() + for track in self.tracks: + bars = track.stream_to_bars(4) + track_str = '' + for bar in bars: + if bar_has_content(bar): + track_str += '█' + else: + track_str += '_' + + print(track.name[:4],':', track_str) + + + +# In[104]: + + +def extract_data(midi_folder_path=None, how=None, instrument=None, remove_duplicates=True): + '''extract musical data from midis in given folder, to x_train, y_train lists on sequences + + parameters: + ----------- + midi_folder_path : string + a path to directory where midi files are stored + how : string {'melody','arrangment'} + - if melody: function extract data of one instrument, + and return lists of x and y that x is actual sequance of 4 bars + and y is next bar + - if arrangment: function extract data of two instruments and + returns a lists of x and y that x is one instrument sequence, + and y is coresponing sequance to x, played by second instrument + instrument: string or tuple of two strings + this parameter is used to specify a instrument class, or classes that you wanted + to extract from midi files. + + if how='melody': string + if how='arrangment' : (string_x, string_y) + + return: + ------- + x_train, y_train - tuple of coresponding lists of x_train and y_train data for training set + + notes: + ------ + extracted data is transposed to the key od C + duplicated x,y pairs are removed + ''' + if how not in {'melody','arrangment'}: + raise ValueError('how parameter must by one of {melody,arrangment} ') + + x_train = [] + y_train = [] + + for directory, subdirectories, files in os.walk(midi_folder_path): + for midi_file in tqdm(files): + midi_file_path = os.path.join(directory, midi_file) + try: + mt = MultiTrack(midi_file_path) + if how=='melody': + x ,y = mt.get_data_seq2seq_melody(instrument) + if how=='arrangment': + x ,y = mt.get_data_seq2seq_arrangment(instrument[0], instrument[1]) + x_train.extend(x) + y_train.extend(y) + except: + continue + + if remove_duplicates: + x_train, y_train = remove_duplicated_sequences((x_train, y_train)) + + return x_train , y_train + + +# In[109]: + + +def main(): + '''extract data from midis + ''' + x_train, y_train = extract_data(midi_folder_path='WhiteStripes', how='arrangment', instrument=('Guitar','Bass')) + pickle.dump((x_train, y_train), open('Guitar_to_Bass_data.pkl','wb')) + return x_train, y_train + + +# In[107]: + + +if __name__=='__main__': + main() +