new library for midi processing

2019-07-03 13:09:49 +02:00 · 2019-07-03 13:09:49 +02:00 · 7d5cd31bdd
commit 7d5cd31bdd
parent acb3024341
1 changed files with 676 additions and 0 deletions
--- a/project/midi_processing.py
+++ b/project/midi_processing.py
@ -0,0 +1,676 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[1]:
 import os
 import pickle
 import numpy as np
 from collections import defaultdict
 import pretty_midi as pm
 from tqdm import tqdm
 # In[98]:
 TODO = '''
 TODO: put methods of data extraction for seq2seq arangment model to multitrack class [DONE]
 TODO: make functions for data extraction for seq2seq model for riff/melody generation [DONE]
 '''
 # In[367]:
 # '''return a dictionary with tracks indexes grouped by instrument class'''
 # tracks = file.tracks
 # names = [track.name for track in tracks]
 # uniqe_instruemnts = set(names)
 # tracks_by_instrument = dict()
 # for key in uniqe_instruemnts:
 #     tracks_by_instrument[key] = []
 # for i, track in enumerate(tracks):
 #     tracks_by_instrument[track.name].append(i)
 # tracks_by_instrument
 # In[368]:
 # def get_posible_pairs(instrument_x, instrument_y):
 #     '''it takes two lists, and return a list of tuples with every posible 2-element combination
 #     parameters:
 #     -----------
 #         instrument_x, instrument_y : string {'Guitar','Bass','Drums'} 
 #             a string that represent a instrument class you want to look for in midi file.
 #     returns:
 #     ----------
 #         pairs: list of tuples
 #             a list of posible 2-element combination of two lists
 #     '''
 #     x_indexes = tracks_by_instrument[instrument_x]
 #     y_indexes = tracks_by_instrument[instrument_y]
 #     pairs = []
 # #     pairs = [(x,y) for x in x_indexes for y in y_indexes]
 #     for x in x_indexes:
 #         for y in y_indexes:
 #             pairs.append((x,y)) 
 #     return pairs
 # In[369]:
 #  def get_common_bars_for_every_possible_pair(pairs)
 #     ''' for every possible pair of given instrument classes
 #     returns common bars from multitrack'''
 #     x_bars = []
 #     y_bars = []
 #     for x_track_index, y_track_index in pairs:
 #         _x_bars, _y_bars = get_common_bars(file.tracks[x_track_index], file.tracks[y_track_index])
 #         x_bars.extend(_x_bars)
 #         y_bars.extend(_y_bars)
 #     return x_bars, y_bars
 # In[370]:
 # def get_data_seq2seq_arrangment(self, bars_in_seq):
 #     ## This is the end of extracting data from midis to seq2seq arranging network.
 #     '''this method is returning a sequances of given lenth by rolling this lists of x and y for arrangemt generation'''
 #     x_seq = []
 #     y_seq = []
 #     for i in range(len(x_bars) - bars_in_seq + 1):
 #         x_seq_to_add = [note for bar in x_bars[i:i+bars_in_seq] for note in bar ]
 #         y_seq_to_add = [note for bar in y_bars[i:i+bars_in_seq] for note in bar ]
 #         x_seq.append(x_seq_to_add)
 #         y_seq.append(y_seq_to_add)
 #     len(x_seq), len(y_seq)
 #     # get_bar_len(y_seq[0])
 # In[371]:
 # def get_track_by_instrument(self):
 #     '''return a dictionary with tracks indexes grouped by instrument class'''
 #     tracks = self.tracks
 #     names = [track.name for track in tracks]
 #     uniqe_instruemnts = set(names)
 #     tracks_by_instrument = dict()
 #     for key in uniqe_instruemnts:
 #         tracks_by_instrument[key] = []
 #     for i, track in enumerate(tracks):
 #         tracks_by_instrument[track.name].append(i)
 #     return tracks_by_instrument
 # In[372]:
 # def get_data_seq2seq_melody(self,instrument_class, x_seq_len=4)
 #     '''return a list of bars with content for every track with given instrument class for melody generaiton'''
 #     instrument_tracks = tracks_by_instrument[instrument_class]
 #     for track_index in instrument_tracks:
 #         # make below as function: get_bars_with_content
 #         bars = file.tracks[track_index].stream_to_bars()
 #         bars_indexes_with_content = get_bar_indexes_with_content(bars)
 #         bars_with_content = [bars[i] for i in get_bar_indexes_with_content(bars)]
 #         # make below as function: get_sequances_from_bars (for seq2seq melody generator)
 #         x_seq = []
 #         y_bar = []
 #         for i in range(len(bars_with_content)-seq_len-1):
 #             _x_seq = bars_with_content[i:i+seq_len]
 #             _y_bar = bars_with_content[i+seq_len]
 #             x_seq.append(_x_seq)
 #             y_bar.append(_y_bar)
 #     len(x_seq), len(y_bar)
 # # print( ' x:' ,x_seq[1],'\n', 'y: ', y_bar[1],'\n', 'seq: ',bars_with_content[1:6])
 # In[15]:
 def get_bar_indexes_with_content(bars):
    '''this method is looking for non-empty bars in the tracks bars
    the empty bar consist of only rest notes.
    returns: a set of bars indexes with notes
    '''
    bars_indexes_with_content = set()
    for i, bar in enumerate(bars):
        if bar_has_content(bar):
            bars_indexes_with_content.add(i)
    return bars_indexes_with_content    
 # In[4]:
 def get_bars_with_content(bars):
    '''this method is looking for non-empty bars in the tracks bars
    the empty bar consist of only rest notes.
    returns: a set of bars with notes
    '''
    bars_with_content = []
    for bar in bars:
        if bar_has_content(bar):
            bars_with_content.append(bar)
    return bars_with_content  
 # In[5]:
 def get_common_bars(track_x,track_y):
    '''return common bars, for two tracks is song
    return X_train, y_train list of 
    '''
    bars_x = track_x.stream_to_bars()
    bars_y = track_y.stream_to_bars()
    bwc_x = get_bar_indexes_with_content(bars_x)
    bwc_y = get_bar_indexes_with_content(bars_y)
    common_bars = bwc_x.intersection(bwc_y)
    common_bars_x = [bars_x[i] for i in common_bars]
    common_bars_y = [bars_y[i] for i in common_bars]
    return common_bars_x, common_bars_y
 # In[6]:
 def get_bar_len(bar):
    """calculate a lenth of a bar
    parameters:
        bar : list
            list of "notes", tuples like (pitches, len)
    """
    time = 0
    for note in bar:
        time += note[1]
    return time
 # In[7]:
 def bar_has_content(bar):
    '''check if bar has any musical information, more accurate
    it checks if in a bar is any non-rest event like note, or chord
    parameters:
    -----------
        bar: list
            list of notes
    return:
    -------
        bool:
            True if bas has concent and False of doesn't
    '''
    bar_notes = len(bar)
    count_rest = 0
    for note in bar:
        if note[0] == (-1,):
            count_rest += 1
    if count_rest == bar_notes:
        return False
    else:
        return True
 # In[8]:
 def round_to_sixteenth_note(x, base=0.25):
        '''round value to closest multiplication by base
        in default to 0.25 witch is sisteenth note accuracy 
        '''
        return base * round(x/base)
 # In[9]:
 def parse_pretty_midi_instrument(instrument, resolution, time_to_tick, key_offset):
    ''' arguments: a prettyMidi instrument object
        return: a custom SingleTrack object
    '''
    first_tick = None
    prev_tick = 0
    prev_note_lenth = 0
    max_rest_len = 4.0
    notes = defaultdict(lambda:[set(), set()])
    for note in instrument.notes:
        if first_tick == None:
 #             first_tick = round_to_sixteenth_note(time_to_tick(note.start)/resolution)
            first_tick = 0
        tick = round_to_sixteenth_note(time_to_tick(note.start)/resolution)
        # add rest if needed
        if prev_tick != None:
            act_tick = prev_tick + prev_note_lenth
            if act_tick < tick:
                rest_lenth = tick - act_tick
                while rest_lenth > max_rest_len:
                    notes[act_tick] = [{-1},{max_rest_len}]
                    act_tick += max_rest_len
                    rest_lenth -= max_rest_len
                notes[act_tick] = [{-1},{rest_lenth}]
        note_lenth = round_to_sixteenth_note(time_to_tick(note.end-note.start)/resolution)
        if -1 in notes[tick][0]:
            notes[tick] = [set(), set()]
        if instrument.is_drum:
            notes[tick][0].add(note.pitch)
        else:
            notes[tick][0].add(note.pitch+key_offset)
        notes[tick][1].add(note_lenth)
        prev_tick = tick
        prev_note_lenth = note_lenth
    notes = [(tuple(e[0]), max(e[1])) for e in notes.values()]
    name = 'Drums' if instrument.is_drum else pm.program_to_instrument_class(instrument.program)
    return SingleTrack(name, instrument.program, instrument.is_drum, Stream(first_tick,notes) )
 # In[10]:
 def remove_duplicated_sequences(xy_tuple):
    x = xy_tuple[0]
    y = xy_tuple[1]
    x_freeze = [tuple(seq) for seq in x]
    y_freeze = [tuple(seq) for seq in y]
    unique_data = list(set(zip(x_freeze,y_freeze)))
    x_unique = [seq[0] for seq in unique_data]
    y_unique = [seq[1] for seq in unique_data]
    return x_unique, y_unique
 # In[11]:
 class Stream():
    def __init__ (self, first_tick, notes):
        self.notes = notes
        self.first_tick = first_tick
    def __repr__(self):
        return '<Stream object with {} musical events>'.format(len(self.notes))
 # In[12]:
 class SingleTrack():
    '''class of single track in midi file encoded from pretty midi library
    atributes:
    ----------
        name:  str
            name of instrument class
        program: int
            midi instrument program
        is_drum: bool
            True if this track is drums track, False otherwise
        stream:
            Stream object of encoded music events (chords or notes)
    '''
    def __init__(self, name=None, program=None, is_drum=None, stream=None):
        self.name = name
        self.program = program
        self.is_drum = is_drum
        self.stream = stream
    def __repr__(self):
        return "<SingleTrack object. Name:{}, Program:{}, is_drum:{}>".format(self.name, self.program, self.is_drum)
    def to_pretty_midi_instrument(self, tempo=100):
        '''is create a pretty midi Instrument object from self.stream.notes sequance
            parameters: 
            -----------
                self: SingleTrack object
            return: 
            -------
                track: PrettyMIDI.Instrument object
        '''
        tempo_strech = 100/tempo
        track = pm.Instrument(program=self.program, is_drum=self.is_drum, name=self.name)
        time = self.stream.first_tick * tempo_strech
        for note in self.stream.notes:
            note_pitch = note[0]
            note_len = note[1] * tempo_strech
            for pitch in note_pitch:
                # if note is a rest (pause)
                if pitch == -1:
                    break
                event = pm.Note(velocity=100, pitch=pitch, start=time, end=time+note_len)
                track.notes.append(event)
            time = time + note_len
        return track
    def stream_to_bars(self, beat_per_bar=4):
        '''it takes notes and split it into equaly time distibuted sequances
        if note is between bars, the note is splited into two notes, with time sum equal to the note between bars.
        arguments:
            stream: list of "notes"
        return:
            bars: list: list of lists of notes, every list has equal time. in musical context it returns bars
        '''
        # TODO: if last bar of sequance has less notes to has time equal given bar lenth it is left shorter
        # fill the rest of bar with rests
        notes = self.stream.notes
        bars = []
        time = 0
        bar_index = 0
        add_tail = False
        note_pitch = lambda note: note[0]
        note_len = lambda note: note[1]
        for note in notes:
            try:
                temp = bars[bar_index]
            except IndexError:
                bars.append([])
            if add_tail:
                bars[bar_index].append(tail_note)
                time += note_len(tail_note)
                add_tail = False
            time += note_len(note)
            if time == beat_per_bar:
                bars[bar_index].append(note)
                time = 0
                bar_index += 1
            elif time > beat_per_bar: # if note is between bars
                between_bars_note_len =  note_len(note)
                tail_note_len = time - beat_per_bar
                leading_note_len = between_bars_note_len - tail_note_len
                leading_note = (note_pitch(note), leading_note_len)
                bars[bar_index].append(leading_note)
                tail_note = (note_pitch(note), tail_note_len)
                add_tail = True
                time = 0
                bar_index += 1
            else:
                bars[bar_index].append(note)
        return bars        
 # In[99]:
 class MultiTrack():
    '''Class that represent one midi file
    atributes:
        pm_obj : PrettyMIDI class object of this midi file
        res: resolution of midi
        time_to_tick: function that coverts miliseconds to ticks. it depends on midi resolution for every midi
        name: path to midi file
        tracks: a list of SingleTrack objects
    '''
    def __init__(self, path=None, tempo=100):
        self.tempo = tempo
        self.pm_obj = pm.PrettyMIDI(path, initial_tempo=self.tempo)
        self.res = self.pm_obj.resolution
        self.time_to_tick = self.pm_obj.time_to_tick
        self.name = path
        self.tracks = [parse_pretty_midi_instrument(instrument, self.res, self.time_to_tick, self.get_pitch_offset_to_C() ) for instrument in self.pm_obj.instruments]  
        self.tracks_by_instrument = self.get_track_by_instrument()
    def get_multiseq(self):
        '''tracks: list of SingleTrack objects
        reaturn a dictionary of sequences for every sequence in SingleTrack
        '''
        multiseq_indexes = set([key for music_track in self.tracks for key in music_track.seq])
        multiseq = dict()
        for seq_id in multiseq_indexes:
            multiseq[seq_id] = []
        for single_track in self.tracks:
            for key, value in single_track.seq.items():
                multiseq[key].append((single_track.name,value))
        return multiseq
    def get_pitch_offset_to_C(self):
        '''to get better train resoult without augmenting midis to all posible keys
        we assumed that most frequent note is the rootnote of song then calculate
        the offset of semitones to move song key to C.
        You should ADD this offset to note pitch to get it right
        '''
        hist = self.pm_obj.get_pitch_class_histogram()
        offset = np.argmax(hist)
        if offset > 6:
            return 12-offset
        else:
            return -offset
    def save(self, path):
        midi_file = pm.PrettyMIDI()
        for track in self.tracks:
            midi_file.instruments.append(track.to_pretty_midi_instrument(self.tempo))
        midi_file.write(path)
        return midi_file
    def get_track_by_instrument(self):
        '''return a dictionary with tracks indexes grouped by instrument class'''
        tracks = self.tracks
        names = [track.name for track in tracks]
        uniqe_instruemnts = set(names)
        tracks_by_instrument = dict()
        for key in uniqe_instruemnts:
            tracks_by_instrument[key] = []
        for i, track in enumerate(tracks):
            tracks_by_instrument[track.name].append(i)
        return tracks_by_instrument
    def get_common_bars_for_every_possible_pair(self, x_instrument, y_instrument):
        ''' for every possible pair of given instrument classes
        returns common bars from multitrack'''
        x_bars = []
        y_bars = []
        pairs = self.get_posible_pairs(x_instrument, y_instrument)
        for x_track_index, y_track_index in pairs:
            _x_bars, _y_bars = get_common_bars(self.tracks[x_track_index], self.tracks[y_track_index])
            x_bars.extend(_x_bars)
            y_bars.extend(_y_bars)
        return x_bars, y_bars
    def get_data_seq2seq_arrangment(self, x_instrument, y_instrument, bars_in_seq=4):
        '''this method is returning a sequances of given lenth by rolling this lists of x and y for arrangemt generation'''
        x_seq = []
        y_seq = []
        x_bars, y_bars = self.get_common_bars_for_every_possible_pair(x_instrument, y_instrument)
        for i in range(len(x_bars) - bars_in_seq + 1):
            x_seq_to_add = [note for bar in x_bars[i:i+bars_in_seq] for note in bar ]
            y_seq_to_add = [note for bar in y_bars[i:i+bars_in_seq] for note in bar ]
            x_seq.append(x_seq_to_add)
            y_seq.append(y_seq_to_add)
        return x_seq, y_seq
    def get_data_seq2seq_melody(self,instrument_class, x_seq_len=4):
        '''return a list of bars with content for every track with given instrument class for melody generaiton'''
        instrument_tracks = self.tracks_by_instrument[instrument_class]
        for track_index in instrument_tracks:
            bars = self.tracks[track_index].stream_to_bars()
            bars_indexes_with_content = get_bar_indexes_with_content(bars)
            bars_with_content = [bars[i] for i in get_bar_indexes_with_content(bars)]
            x_seq = []
            y_seq = []
            for i in range(len(bars_with_content)-x_seq_len-1):
                _x_seq = [note for bar in bars_with_content[i:i+x_seq_len] for note in bar]
                _y_bar = bars_with_content[i+x_seq_len]
                x_seq.append(_x_seq)
                y_seq.append(_y_bar)
        return x_seq, y_seq
    def get_posible_pairs(self, instrument_x, instrument_y):
        '''it takes two lists, and return a list of tuples with every posible 2-element combination
        parameters:
        -----------
            instrument_x, instrument_y : string {'Guitar','Bass','Drums'} 
                a string that represent a instrument class you want to look for in midi file.
        returns:
        ----------
            pairs: list of tuples
                a list of posible 2-element combination of two lists
        '''
        x_indexes = self.tracks_by_instrument[instrument_x]
        y_indexes = self.tracks_by_instrument[instrument_y]
 #         pairs = []
        pairs = [(x,y) for x in x_indexes for y in y_indexes]
 #         for x in x_indexes:
 #             for y in y_indexes:
 #                 pairs.append((x,y)) 
        return pairs
    def show_map(self):
        print(self.name)
        print()
        for track in self.tracks:
            bars = track.stream_to_bars(4)
            track_str = ''
            for bar in bars:
                if bar_has_content(bar):
                    track_str += '█'
                else:
                    track_str += '_'
            print(track.name[:4],':', track_str)
 # In[104]:
 def extract_data(midi_folder_path=None, how=None, instrument=None, remove_duplicates=True):
    '''extract musical data from midis in given folder, to x_train, y_train lists on sequences
    parameters:
    -----------
        midi_folder_path : string 
            a path to directory where midi files are stored
        how : string {'melody','arrangment'}
            - if melody: function extract data of one instrument,
            and return lists of x and y that x is actual sequance of 4 bars
            and y is next bar
            - if arrangment: function extract data of two instruments and
            returns a lists of x and y that x is one instrument sequence,
            and y is coresponing sequance to x, played by second instrument
        instrument: string or tuple of two strings
            this parameter is used to specify a instrument class, or classes that you wanted
            to extract from midi files.
            if how='melody': string
            if how='arrangment' : (string_x, string_y)
    return:
    -------
        x_train, y_train - tuple of coresponding lists of x_train and y_train data for training set
    notes:
    ------
        extracted data is transposed to the key od C
        duplicated x,y pairs are removed
    '''
    if how not in {'melody','arrangment'}:
        raise ValueError('how parameter must by one of {melody,arrangment} ')
    x_train = []
    y_train = []
    for directory, subdirectories, files in os.walk(midi_folder_path):
        for midi_file in tqdm(files):
            midi_file_path = os.path.join(directory, midi_file)
            try:
                mt = MultiTrack(midi_file_path)
                if how=='melody':
                    x ,y = mt.get_data_seq2seq_melody(instrument)
                if how=='arrangment':
                    x ,y = mt.get_data_seq2seq_arrangment(instrument[0], instrument[1])
                x_train.extend(x)
                y_train.extend(y)
            except:
                continue
    if remove_duplicates:   
        x_train, y_train = remove_duplicated_sequences((x_train, y_train))
    return x_train , y_train
 # In[109]:
 def main():
    '''extract data from midis
    '''
    x_train, y_train = extract_data(midi_folder_path='WhiteStripes', how='arrangment', instrument=('Guitar','Bass'))
    pickle.dump((x_train, y_train), open('Guitar_to_Bass_data.pkl','wb'))
    return x_train, y_train
 # In[107]:
 if __name__=='__main__':
    main()