diff --git a/docs/document.aux b/docs/document.aux new file mode 100644 index 0000000..e8153af --- /dev/null +++ b/docs/document.aux @@ -0,0 +1,20 @@ +\relax +\@writefile{toc}{\contentsline {section}{\numberline {1}Wst\IeC {\k e}p}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Muzyka}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {2}MIDI, Muzyka jako Informacje}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}MIDI}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Pianoroll}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Muzyka jako tr\IeC {\'o}jwymiarowa tablica}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {3}Generatwne sieci neuronowe - GANy, VAE, LSTMy}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Autoencodery, VAE}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}LSTM}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {4}Modele generatywne stosowane w generowaniu muzyki}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Project Magenta}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}MuseGAN}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}VAE-MIDI}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {5}Budowanie generatora muzyki}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Wyodr\IeC {\k e}bnienie danych z plik\IeC {\'o}w MIDI}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Przygotowanie Modelu GAN}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Proces uczenia, pr\IeC {\'o}bki co kilka epoch\IeC {\'o}w, costloss wykres}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Pr\IeC {\'o}bki ko\IeC {\'n}cowe, jak\IeC {\k a} muzyk\IeC {\k e} da si\IeC {\k e} z tego wygenerowa\IeC {\'c}}{3}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {6}Podsumowanie}{3}\protected@file@percent } diff --git a/docs/document.log b/docs/document.log new file mode 100644 index 0000000..8248af4 --- /dev/null +++ b/docs/document.log @@ -0,0 +1,151 @@ +This is pdfTeX, Version 3.14159265-2.6-1.40.19 (TeX Live 2018/W32TeX) (preloaded format=pdflatex 2019.2.21) 28 MAY 2019 12:32 +entering extended mode + restricted \write18 enabled. + %&-line parsing enabled. +**document.tex +(./document.tex +LaTeX2e <2018-12-01> +(c:/software/latex/texmf-dist/tex/latex/base/article.cls +Document Class: article 2018/09/03 v1.4i Standard LaTeX document class +(c:/software/latex/texmf-dist/tex/latex/base/size10.clo +File: size10.clo 2018/09/03 v1.4i Standard LaTeX file (size option) +) +\c@part=\count80 +\c@section=\count81 +\c@subsection=\count82 +\c@subsubsection=\count83 +\c@paragraph=\count84 +\c@subparagraph=\count85 +\c@figure=\count86 +\c@table=\count87 +\abovecaptionskip=\skip41 +\belowcaptionskip=\skip42 +\bibindent=\dimen102 +) +(c:/software/latex/texmf-dist/tex/latex/polski/polski.sty +Package: polski 2017/05/04 v1.3.4 Polish language package + + Switching to Polish text encoding and Polish maths fonts. +(c:/software/latex/texmf-dist/tex/latex/base/ot4enc.def +File: ot4enc.def 2018/08/11 v2.0j Standard LaTeX file +Now handling font encoding OT4 ... +... no UTF-8 mapping file for font encoding OT4 +) +LaTeX Font Info: Try loading font information for OT4+cmr on input line 360. + + +(c:/software/latex/texmf-dist/tex/latex/polski/ot4cmr.fd +File: ot4cmr.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW) +) +LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' +(Font) OT1/cmr/bx/n --> OT4/cmr/bx/n on input line 360. +LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' +(Font) OT1/cmss/m/n --> OT4/cmss/m/n on input line 360. +LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' +(Font) OT1/cmr/m/it --> OT4/cmr/m/it on input line 360. +LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' +(Font) OT1/cmtt/m/n --> OT4/cmtt/m/n on input line 360. +LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' +(Font) OT1/cmss/bx/n --> OT4/cmss/bx/n on input line 360. +LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' +(Font) OT1/cmr/bx/it --> OT4/cmr/bx/it on input line 360. +LaTeX Font Info: Encoding `OT1' has changed to `OT4' for symbol font +(Font) `operators' in the math version `normal' on input line 360. + +LaTeX Font Info: Overwriting symbol font `operators' in version `normal' +(Font) OT1/cmr/m/n --> OT4/cmr/m/n on input line 360. +LaTeX Font Info: Overwriting symbol font `letters' in version `normal' +(Font) OML/cmm/m/it --> OML/plm/m/it on input line 360. +LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' +(Font) OMS/cmsy/m/n --> OMS/plsy/m/n on input line 360. +LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' +(Font) OMX/cmex/m/n --> OMX/plex/m/n on input line 360. +LaTeX Font Info: Encoding `OT1' has changed to `OT4' for symbol font +(Font) `operators' in the math version `bold' on input line 360. +LaTeX Font Info: Overwriting symbol font `operators' in version `bold' +(Font) OT1/cmr/bx/n --> OT4/cmr/bx/n on input line 360. +LaTeX Font Info: Overwriting symbol font `letters' in version `bold' +(Font) OML/cmm/b/it --> OML/plm/b/it on input line 360. +LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' +(Font) OMS/cmsy/b/n --> OMS/plsy/b/n on input line 360. +) + +LaTeX Warning: Unused global option(s): + [utf8]. + +(./document.aux) +\openout1 = `document.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Checking defaults for OT4/cmr/m/n on input line 18. +LaTeX Font Info: ... okay on input line 18. +LaTeX Font Info: Try loading font information for OML+plm on input line 20. + (c:/software/latex/texmf-dist/tex/latex/polski/omlplm.fd +File: omlplm.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW) +) +LaTeX Font Info: Try loading font information for OMS+plsy on input line 20. + + +(c:/software/latex/texmf-dist/tex/latex/polski/omsplsy.fd +File: omsplsy.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW) +) +LaTeX Font Info: Try loading font information for OMX+plex on input line 20. + + +(c:/software/latex/texmf-dist/tex/latex/polski/omxplex.fd +File: omxplex.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW) +) +LaTeX Font Info: External font `plex10' loaded for size +(Font) <12> on input line 20. +LaTeX Font Info: External font `plex10' loaded for size +(Font) <8> on input line 20. +LaTeX Font Info: External font `plex10' loaded for size +(Font) <6> on input line 20. + [1 + +{c:/software/latex/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] (./document.t +oc +LaTeX Font Info: External font `plex10' loaded for size +(Font) <10> on input line 2. +LaTeX Font Info: External font `plex10' loaded for size +(Font) <7> on input line 2. +LaTeX Font Info: External font `plex10' loaded for size +(Font) <5> on input line 2. +) +\tf@toc=\write3 +\openout3 = `document.toc'. + + [1] [2] +[3] (./document.aux) ) +Here is how much of TeX's memory you used: + 474 strings out of 492616 + 5750 string characters out of 6131816 + 66703 words of memory out of 5000000 + 4428 multiletter control sequences out of 15000+600000 + 13779 words of font info for 39 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 23i,7n,25p,379b,252s stack positions out of 5000i,500n,10000p,200000b,80000s +{c:/software/latex/texmf-dist/fonts/enc/dvips/pl/plrm.enc +} +Output written on document.pdf (4 pages, 61011 bytes). +PDF statistics: + 42 PDF objects out of 1000 (max. 8388607) + 29 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 1 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git a/docs/document.pdf b/docs/document.pdf new file mode 100644 index 0000000..c5b1f30 Binary files /dev/null and b/docs/document.pdf differ diff --git a/docs/document.synctex.gz b/docs/document.synctex.gz new file mode 100644 index 0000000..5ef956a Binary files /dev/null and b/docs/document.synctex.gz differ diff --git a/docs/document.tex b/docs/document.tex new file mode 100644 index 0000000..87e3fb7 --- /dev/null +++ b/docs/document.tex @@ -0,0 +1,80 @@ +\documentclass[utf8]{article} +\usepackage{polski} + +\title{% + Generowanie muzyki \\ + przy pomocy głębokiego uczenia \\ + \large Music generation with deep learning} + + +\author{% + Cezary Pukownik \\ + \newline + \small Opiekun pracy:\\ + dr hab. Tomasz Górecki} + + \date{2019-05-28} + +\begin{document} + + \maketitle + + \newpage + \pagenumbering{arabic} + \tableofcontents + \newpage + + + \section{Wstęp} + To jest wstep do pracy magisterskiej + + \subsection{Muzyka} + Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac, co o tym sądze, oraz czy sztuczna inteligencja zastapi muzyków w przyszłości. + + \section{MIDI, Muzyka jako Informacje} + Tutaj opiszę w jaki sposób muzyka jest zapisywana jako informacje komputerowe, protokuł midi, przedstawienie muzyki jako pianorolle. + + \subsection{MIDI} + Tutaj opiszę protokuł MIDI + + \subsection{Pianoroll} + Tutaj opisze co todsdsddsdss są pianorolle, jak je czytać i czemu służą. + + \subsection{Muzyka jako trójwymiarowa tablica} + Tutaj opisze dlaczego muzykę moża opisać jako trójwymiarowa tablicę. + + \section{Generatwne sieci neuronowe - GANy, VAE, LSTMy} + Tutaj będzie opisane, dlaczego sieci neuronowe, radzą sobie lepiej w produkowaniu muzyki niż inne modele. Oraz jakie modele są odpowidnie do pewnych zastosowań, JAZZ - LSTM, bardziej ustrukturyzowana - VAE itp. + + \subsection{Autoencodery, VAE} + Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac + + \subsection{LSTM} + Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac + + \section{Modele generatywne stosowane w generowaniu muzyki} + Przykłady gotowych podeść do generowania muzyki, oraz jakie modele zostały zastosowane. dlaczego takie itp. + + \subsection{Project Magenta} + Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac + + \subsection{MuseGAN} + Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac + + \subsection{VAE-MIDI} + Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac + + + \section{Budowanie generatora muzyki} + W tym rozdzialę opiszę w jaki sposób zbudowałem swój własny geneator muzyki, jak przechodził procesz uczenia, jakie próbki udało mi się wygenrować. Opis kodu który napisałem. + + \subsection{Wyodrębnienie danych z plików MIDI} + \subsection{Przygotowanie Modelu GAN} + \subsection{Proces uczenia, próbki co kilka epochów, costloss wykres} + \subsection{Próbki końcowe, jaką muzykę da się z tego wygenerować} + + \section{Podsumowanie} + Ostateczne wnioski, czy muzyka generowana komputerowa da się lubić? Czy to pozytywnie wpłynie na przemysł muzyczny? Tak i nie. Może złużyć jako inspiracja dla muzyków, proces wspierający. Z drugiej strony może obnizy koszty produkowania muzyki pop, która i tak jest już bardzo powtarzalna. Czy sieci neuronowe nauczą się produkować Hity? + + +\end{document} \ No newline at end of file diff --git a/docs/document.toc b/docs/document.toc new file mode 100644 index 0000000..d003bae --- /dev/null +++ b/docs/document.toc @@ -0,0 +1,19 @@ +\contentsline {section}{\numberline {1}Wst\IeC {\k e}p}{2}% +\contentsline {subsection}{\numberline {1.1}Muzyka}{2}% +\contentsline {section}{\numberline {2}MIDI, Muzyka jako Informacje}{2}% +\contentsline {subsection}{\numberline {2.1}MIDI}{2}% +\contentsline {subsection}{\numberline {2.2}Pianoroll}{2}% +\contentsline {subsection}{\numberline {2.3}Muzyka jako tr\IeC {\'o}jwymiarowa tablica}{2}% +\contentsline {section}{\numberline {3}Generatwne sieci neuronowe - GANy, VAE, LSTMy}{2}% +\contentsline {subsection}{\numberline {3.1}Autoencodery, VAE}{2}% +\contentsline {subsection}{\numberline {3.2}LSTM}{2}% +\contentsline {section}{\numberline {4}Modele generatywne stosowane w generowaniu muzyki}{3}% +\contentsline {subsection}{\numberline {4.1}Project Magenta}{3}% +\contentsline {subsection}{\numberline {4.2}MuseGAN}{3}% +\contentsline {subsection}{\numberline {4.3}VAE-MIDI}{3}% +\contentsline {section}{\numberline {5}Budowanie generatora muzyki}{3}% +\contentsline {subsection}{\numberline {5.1}Wyodr\IeC {\k e}bnienie danych z plik\IeC {\'o}w MIDI}{3}% +\contentsline {subsection}{\numberline {5.2}Przygotowanie Modelu GAN}{3}% +\contentsline {subsection}{\numberline {5.3}Proces uczenia, pr\IeC {\'o}bki co kilka epoch\IeC {\'o}w, costloss wykres}{3}% +\contentsline {subsection}{\numberline {5.4}Pr\IeC {\'o}bki ko\IeC {\'n}cowe, jak\IeC {\k a} muzyk\IeC {\k e} da si\IeC {\k e} z tego wygenerowa\IeC {\'c}}{3}% +\contentsline {section}{\numberline {6}Podsumowanie}{3}% diff --git a/generate.py b/generate.py deleted file mode 100644 index bcaa862..0000000 --- a/generate.py +++ /dev/null @@ -1,22 +0,0 @@ -import numpy as np -from keras.layers import Input, Dense, Conv2D -from keras.models import Model -GENERATED_BEAT_PATH = 'data/output/generated_bar' -MODEL_PATH = 'data/autoencoder_model.h5' -SAMPLES_PATH = 'data/samples.npz' - -input = Input(shape=(1,96,128)) -encoded = Conv2D(filters = 32, kernel_size = 1)(input) -decoded = Conv2D(filters = 128, kernel_size = 1)(encoded) -autoencoder = Model(input, decoded) - -# load weights into new model -autoencoder.load_weights(MODEL_PATH) -print("Loaded model from disk") - -# generate_seed = np.random.rand(1,1,96,128) - -generate_seed = np.load(SAMPLES_PATH)['arr_0'][0:] - -generated_beat = autoencoder.predict(generate_seed) -np.savez_compressed(GENERATED_BEAT_PATH, generated_beat) diff --git a/midi_to_samples.py b/midi_to_samples.py deleted file mode 100644 index ae10385..0000000 --- a/midi_to_samples.py +++ /dev/null @@ -1,76 +0,0 @@ -import settings -import pypianoroll as roll -import matplotlib.pyplot as plt -import numpy as np -import os -from math import floor - -MIDI_DIRECTORY = settings.midi_path -SAMPLES_DIRECTORY = settings.samples_path -MIDI_RESOLUTION = settings.midi_resolution -BEAT_PER_BATCH = settings.beats_per_sample - -samples = np.empty((0,BEAT_PER_BATCH,96,128)) - -def erase_note_lenth(pianoroll): - if pianoroll.ndim != 2: - raise ValueError('pianoroll should be two dimentional') - now_block = [] - for x in pianoroll: - this = None - prev = None - new_line =[] - for y in x: - this = y - if prev != None: - if this > 0 and prev > 0: - new_line.append(0) - else: - new_line.append(y) - else: - new_line.append(y) - prev = this - now_block.append(new_line) - return np.array(now_block) - -print('Start convertion') -for midi_file in os.listdir(MIDI_DIRECTORY): - try: - print('Reading file: {}'.format(midi_file)) - song = roll.Multitrack('{}/{}'.format(MIDI_DIRECTORY, midi_file)) - # no_drums_mt = roll.Multitrack(tempo=120.0, downbeat=[0, 96, 192, 288], beat_resolution=24) - intruments_only = roll.Multitrack(tempo=120.0, beat_resolution=24) - - for track in song.tracks: - if track.is_drum == False: - print(track.name, track.program) - intruments_only.append_track(track=track, pianoroll=track.pianoroll) - instrument_track = track.pianoroll - - # plt.imshow(instrument_track[24*8:24*24].T) - # plt.savefig('data/0_{}.png'.format(midi_file)) - - instrument_track = erase_note_lenth(instrument_track.T).T - # plt.imshow(instrument_track[24*8:24*24].T) - # plt.savefig('data/1_{}.png'.format(midi_file)) - - - # instruments = no_drums_mt.get_merged_pianoroll(mode='sum') - - beats = floor( (instrument_track.shape[0] / MIDI_RESOLUTION) / BEAT_PER_BATCH) * BEAT_PER_BATCH - notes_for_beats = beats * MIDI_RESOLUTION - - print('beats: ', beats) - samples_of_song = np.asarray(np.split(instrument_track[:notes_for_beats], beats)) - samples_of_song = samples_of_song.reshape(int(beats/BEAT_PER_BATCH),BEAT_PER_BATCH,96,128) - - print('Converted samples: {}'.format(samples_of_song.shape)) - samples = np.concatenate([samples_of_song,samples], axis=0) - np.savez_compressed(SAMPLES_DIRECTORY,samples) - - except Exception as error: - print('Convertion faild: {}'.format(error)) - pass - - finally: - print('Done!') diff --git a/project/__pycache__/settings.cpython-36.pyc b/project/__pycache__/settings.cpython-36.pyc new file mode 100644 index 0000000..5733c95 Binary files /dev/null and b/project/__pycache__/settings.cpython-36.pyc differ diff --git a/project/__pycache__/settings.cpython-37.pyc b/project/__pycache__/settings.cpython-37.pyc new file mode 100644 index 0000000..e94229e Binary files /dev/null and b/project/__pycache__/settings.cpython-37.pyc differ diff --git a/project/data/autoencoder_model.h5 b/project/data/autoencoder_model.h5 new file mode 100644 index 0000000..0c2054b Binary files /dev/null and b/project/data/autoencoder_model.h5 differ diff --git a/project/data/midi/Lenny Kravitz - Are You Gonna Go My Way.mid b/project/data/midi/Lenny Kravitz - Are You Gonna Go My Way.mid new file mode 100644 index 0000000..3191bca Binary files /dev/null and b/project/data/midi/Lenny Kravitz - Are You Gonna Go My Way.mid differ diff --git a/project/data/midi/arctic_monkeys-505.mid b/project/data/midi/arctic_monkeys-505.mid new file mode 100644 index 0000000..e3d6083 Binary files /dev/null and b/project/data/midi/arctic_monkeys-505.mid differ diff --git a/project/data/midi/red_hot_chili_peppers-cant_stop.mid b/project/data/midi/red_hot_chili_peppers-cant_stop.mid new file mode 100644 index 0000000..684a233 Binary files /dev/null and b/project/data/midi/red_hot_chili_peppers-cant_stop.mid differ diff --git a/project/data/output/generated_bar.npz b/project/data/output/generated_bar.npz new file mode 100644 index 0000000..d9f6489 Binary files /dev/null and b/project/data/output/generated_bar.npz differ diff --git a/project/data/output/generated_midi.mid b/project/data/output/generated_midi.mid new file mode 100644 index 0000000..e2fdac7 Binary files /dev/null and b/project/data/output/generated_midi.mid differ diff --git a/project/data/output/pianoroll.png b/project/data/output/pianoroll.png new file mode 100644 index 0000000..8af0138 Binary files /dev/null and b/project/data/output/pianoroll.png differ diff --git a/project/data/samples.npz b/project/data/samples.npz new file mode 100644 index 0000000..0b82e27 Binary files /dev/null and b/project/data/samples.npz differ diff --git a/project/generate.py b/project/generate.py new file mode 100644 index 0000000..ba6ae36 --- /dev/null +++ b/project/generate.py @@ -0,0 +1,26 @@ +import numpy as np +import tensorflow as tf +from keras.layers import Input, Dense, Conv2D +from keras.models import Model +import settings + +#model +input_shape = settings.midi_resolution*128 +input_img = tf.keras.layers.Input(shape=(input_shape,)) +encoded = tf.keras.layers.Dense(160, activation='relu')(input_img) +decoded = tf.keras.layers.Dense(input_shape, activation='sigmoid')(encoded) +autoencoder = tf.keras.models.Model(input_img, decoded) + +autoencoder.compile(optimizer='adadelta', + loss='categorical_crossentropy', + metrics=['accuracy']) + +# load weights into new model +autoencoder.load_weights(settings.model_path) +print("Loaded model from {}".format(settings.model_path)) + +# generate_seed = np.random.rand(12288).reshape(1,12288) +generate_seed = np.load(settings.samples_path)['arr_0'][15].reshape(1,12288) + +generated_sample = autoencoder.predict(generate_seed) +np.savez_compressed(settings.generated_sample_path, generated_sample) diff --git a/project/midi_to_samples.py b/project/midi_to_samples.py new file mode 100644 index 0000000..532191b --- /dev/null +++ b/project/midi_to_samples.py @@ -0,0 +1,249 @@ +import settings +import pypianoroll as roll +import matplotlib.pyplot as plt +import numpy as np +import os +from tqdm import tqdm +from math import floor +import sys +from sklearn.preprocessing import MinMaxScaler + +midi_program = { + 0 : 'Perc', + 1 : 'Acoustic Grand Piano', + 2 : 'Bright Acoustic Piano', + 3 : 'Electric Grand Piano', + 4 : 'Honky-tonk Piano', + 5 : 'Electric Piano 1', + 6 : 'Electric Piano 2', + 7 : 'Harpsichord', + 8 : 'Clavi', + 9 : 'Celesta', + 10 : 'Glockenspiel', + 11 : 'Music Box', + 12 : 'Vibraphone', + 13 : 'Marimba', + 14 : 'Xylophone', + 15 : 'Tubular Bells', + 16 : 'Dulcimer', + 17 : 'Drawbar Organ', + 18 : 'Percussive Organ', + 19 : 'Rock Organ', + 20 : 'Church Organ', + 21 : 'Reed Organ', + 22 : 'Accordion', + 23 : 'Harmonica', + 24 : 'Tango Accordion', + 25 : 'Acoustic Guitar (nylon)', + 26 : 'Acoustic Guitar (steel)', + 27 : 'Electric Guitar (jazz)', + 28 : 'Electric Guitar (clean)', + 29 : 'Electric Guitar (muted)', + 30 : 'Overdriven Guitar', + 31 : 'Distortion Guitar', + 32 : 'Guitar harmonics', + 33 : 'Acoustic Bass', + 34 : 'Electric Bass (finger)', + 35 : 'Electric Bass (pick)', + 36 : 'Fretless Bass', + 37 : 'Slap Bass 1', + 38 : 'Slap Bass 2', + 39 : 'Synth Bass 1', + 40 : 'Synth Bass 2', + 41 : 'Violin', + 42 : 'Viola', + 43 : 'Cello', + 44 : 'Contrabass', + 45 : 'Tremolo Strings', + 46 : 'Pizzicato Strings', + 47 : 'Orchestral Harp', + 48 : 'Timpani', + 49 : 'String Ensemble 1', + 50 : 'String Ensemble 2', + 51 : 'SynthStrings 1', + 52 : 'SynthStrings 2', + 53 : 'Choir Aahs', + 54 : 'Voice Oohs', + 55 : 'Synth Voice', + 56 : 'Orchestra Hit', + 57 : 'Trumpet', + 58 : 'Trombone', + 59 : 'Tuba', + 60 : 'Muted Trumpet', + 61 : 'French Horn', + 62 : 'Brass Section', + 63 : 'SynthBrass 1', + 64 : 'SynthBrass 2', + 65 : 'Soprano Sax', + 66 : 'Alto Sax', + 67 : 'Tenor Sax', + 68 : 'Baritone Sax', + 69 : 'Oboe', + 70 : 'English Horn', + 71 : 'Bassoon', + 72 : 'Clarinet', + 73 : 'Piccolo', + 74 : 'Flute', + 75 : 'Recorder', + 76 : 'Pan Flute', + 77 : 'Blown Bottle', + 78 : 'Shakuhachi', + 79 : 'Whistle', + 80 : 'Ocarina', + 81 : 'Lead 1 (square)', + 82 : 'Lead 2 (sawtooth)', + 83 : 'Lead 3 (calliope)', + 84 : 'Lead 4 (chiff)', + 85 : 'Lead 5 (charang)', + 86 : 'Lead 6 (voice)', + 87 : 'Lead 7 (fifths)', + 88 : 'Lead 8 (bass + lead)', + 89 : 'Pad 1 (new age)', + 90 : 'Pad 2 (warm)', + 91 : 'Pad 3 (polysynth)', + 92 : 'Pad 4 (choir)', + 93 : 'Pad 5 (bowed)', + 94 : 'Pad 6 (metallic)', + 95 : 'Pad 7 (halo)', + 96 : 'Pad 8 (sweep)', + 97 : 'FX 1 (rain)', + 98 : 'FX 2 (soundtrack)', + 99 : 'FX 3 (crystal)', + 100 : 'FX 4 (atmosphere)', + 101 : 'FX 5 (brightness)', + 102 : 'FX 6 (goblins)', + 103 : 'FX 7 (echoes)', + 104 : 'FX 8 (sci-fi)', + 105 : 'Sitar', + 106 : 'Banjo', + 107 : 'Shamisen', + 108 : 'Koto', + 109 : 'Kalimba', + 110 : 'Bag pipe', + 111 : 'Fiddle', + 112 : 'Shanai', + 113 : 'Tinkle Bell', + 114 : 'Agogo', + 115 : 'Steel Drums', + 116 : 'Woodblock', + 117 : 'Taiko Drum', + 118 : 'Melodic Tom', + 119 : 'Synth Drum', + 120 : 'Reverse Cymbal', + 121 : 'Guitar Fret Noise', + 122 : 'Breath Noise', + 123 : 'Seashore', + 124 : 'Bird Tweet', + 125 : 'Telephone Ring', + 126 : 'Helicopter', + 127 : 'Applause', + 128 : 'Gunshot' +} + +# strasznie wolna funcja ;/ +def trim_notes(pianoroll): + now_block = [] + for x in pianoroll: + this = None + prev = None + new_line =[] + for y in x: + this = y + if prev != None: + if this > 0 and prev > 0: + new_line.append(0) + else: + new_line.append(y) + else: + new_line.append(y) + prev = this + now_block.append(new_line) + return np.array(now_block) + +def metrum_check(midi_lenght, metrum=4, beat_resolution=24): + return True if midi_lenght % (metrum * beat_resolution) == 0 else False + +# >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]] +# >>> scaler = MinMaxScaler() +# >>> print(scaler.fit(data)) +# MinMaxScaler(copy=True, feature_range=(0, 1)) +# >>> print(scaler.data_max_) +# [ 1. 18.] +# >>> print(scaler.transform(data)) +# [[0. 0. ] +# [0.25 0.25] +# [0.5 0.5 ] +# [1. 1. ]] +# >>> print(scaler.transform([[2, 2]])) +# [[1.5 0. ]] + + +def to_samples(midi_file_path, midi_res=settings.midi_resolution, ignore_note_lenght=settings.ignore_note_lenght): + print('exporting samples from: {}'.format(midi_file_path)) + midi_file = roll.Multitrack(midi_file_path) + samples = None + all_samples = np.empty((0,settings.midi_resolution,128)) + for track in midi_file.tracks: + # if not track.is_drum: + if not metrum_check(track.pianoroll.shape[0]): + print('Track skipped') + continue + else: + instrument_track = track.pianoroll + instrument_track = trim_notes(instrument_track.T).T if ignore_note_lenght else instrument_track + scaler = MinMaxScaler() + instrument_track = scaler.fit_transform(instrument_track) + whole_beats = int(instrument_track.shape[0] / midi_res) + samples = instrument_track.reshape(whole_beats, midi_res, 128) + print('Exported {} samples of {}'.format(whole_beats, midi_program[track.program])) + all_samples = np.concatenate([samples, all_samples], axis=0) + return all_samples + +def to_midi(samples, output_path=settings.generated_midi_path, program=0, tempo=120, beat_resolution=settings.beat_resolution): + tracks = [roll.Track(samples, program=program)] + return_midi = roll.Multitrack(tracks=tracks, tempo=tempo, downbeat=[0, 96, 192, 288], beat_resolution=beat_resolution) + roll.write(return_midi, settings.generated_midi_path) + +def to_png(samples, output_path=settings.generated_pianoroll_path, horizontal=True): + img = samples.T if horizontal else samples + plt.imshow(img, cmap='gray') + plt.savefig(output_path) + +def delete_empty_samples(sample_pack): + temp_sample_pack = sample_pack + index_manipulator = 1 + for index, sample in enumerate(sample_pack): + if sample.sum() == 0: + temp_sample_pack = np.delete(temp_sample_pack, index-index_manipulator, axis=0) + index_manipulator = index_manipulator + 1 + print('Deleted {} empty samples'.format(index_manipulator-1)) + return temp_sample_pack + +def main(): + if sys.argv[1]=='-e': + print('Exporting started...') + sample_pack = np.empty((0,settings.midi_resolution,128)) + for midi_file in os.listdir(settings.midi_dir): + midi_file_path = '{}/{}'.format(settings.midi_dir, midi_file) + midi_samples = to_samples(midi_file_path) + if midi_samples is None: + continue + sample_pack = np.concatenate((midi_samples, sample_pack), axis=0) + + sample_pack = delete_empty_samples(sample_pack) + np.savez_compressed(settings.samples_dir, sample_pack) + print('Exported {} samples'.format(sample_pack.shape[0])) + elif sys.argv[1]=='-c': + to_midi(settings.generated_sample_path) + print('Samples to midi saved to {}'.format(settings.generated_sample_path)) + elif sys.argv[1]=='-p': + sample_pack = np.load(settings.samples_path)['arr_0'] + for i, sample in tqdm(enumerate(sample_pack)): + to_png(sample, output_path='data/preview/{}.png'.format(i)) + if i>50: + sys.exit() + else: + print('type command afrer -e to export samples, -c to convert samples to midi, -p to preview samples in png') + +if __name__ == '__main__': + main() diff --git a/project/samples_to_midi.py b/project/samples_to_midi.py new file mode 100644 index 0000000..06f6cf2 --- /dev/null +++ b/project/samples_to_midi.py @@ -0,0 +1,18 @@ +import pypianoroll as roll +import matplotlib.pyplot as plt +import numpy as np +import os +import settings + +instruments = np.load(settings.generated_sample_path)['arr_0'][0] + +instruments = instruments.reshape(96,128) +# instruments = instruments>0.5 +instruments = instruments*255 + +i = roll.Track(instruments, program=0) +generated_midi = roll.Multitrack(tracks=[i], tempo=120.0, downbeat=[0, 96, 192, 288], beat_resolution=24) +roll.write(generated_midi, settings.generated_midi_path) + +plt.imshow(instruments.T, cmap='gray') +plt.savefig(settings.generated_pianoroll_path) diff --git a/project/settings.py b/project/settings.py new file mode 100644 index 0000000..fbac6b1 --- /dev/null +++ b/project/settings.py @@ -0,0 +1,17 @@ +# paths +midi_dir = 'data/midi' +samples_dir = 'data/samples' +samples_path = 'data/samples.npz' +model_path = 'data/autoencoder_model.h5' +generated_sample_path = 'data/output/generated_bar.npz' +generated_midi_path = 'data/output/generated_midi.mid' +generated_pianoroll_path = 'data/output/pianoroll.png' + +# export_settings +midi_resolution = 96 +beat_resolution = 24 +beats_per_sample = 1 +ignore_note_lenght = False + +#train_settings +epochs = 1000 diff --git a/settings.pyc b/project/settings.pyc similarity index 100% rename from settings.pyc rename to project/settings.pyc diff --git a/project/train.py b/project/train.py new file mode 100644 index 0000000..a59d7d1 --- /dev/null +++ b/project/train.py @@ -0,0 +1,31 @@ +import tensorflow as tf +import settings +from tensorflow.keras import layers +from keras.layers import Input, Dense, Conv2D, Flatten +from keras.models import Model, Sequential +import numpy as np +from sys import exit +import pickle + +print('Reading samples from: {}'.format(settings.samples_path)) + +train_X = np.load(settings.samples_path)['arr_0'] + +n_samples = train_X.shape[0] +input_shape = settings.midi_resolution*128 +train_X = train_X.reshape(n_samples, input_shape) + +# encoder model +input_img = tf.keras.layers.Input(shape=(input_shape,)) +encoded = tf.keras.layers.Dense(160, activation='relu')(input_img) +decoded = tf.keras.layers.Dense(input_shape, activation='sigmoid')(encoded) +autoencoder = tf.keras.models.Model(input_img, decoded) + +autoencoder.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + +autoencoder.fit(train_X, train_X, epochs=settings.epochs, batch_size=32) + +autoencoder.save_weights(settings.model_path) +print("Model save to {}".format(settings.model_path)) diff --git a/samples_to_midi.py b/samples_to_midi.py deleted file mode 100644 index e2177ce..0000000 --- a/samples_to_midi.py +++ /dev/null @@ -1,26 +0,0 @@ -GENERATED_BEAT_PATH = 'data/output/generated_bar.npz' -OUTPUT_PATH = 'data/output/generated_midi.mid' -OUTPUT_PATH_PIANOROLL = 'data/output/pianoroll.png' - -import pypianoroll as roll -import matplotlib.pyplot as plt -import numpy as np -import os - -instruments = np.load(GENERATED_BEAT_PATH)['arr_0'][0] - -instruments = instruments.reshape(96,128) -instruments = instruments>instruments.min()*0.3 -instruments = instruments*255 - -# zeros_up = np.zeros((instruments.shape[0],24)) -# zeros_down = np.zeros((instruments.shape[0], 20)) -# instruments_full = np.concatenate([zeros_up,instruments], axis=1) -# instruments_full = np.concatenate([instruments_full,zeros_down], axis=1) - -i = roll.Track(instruments, program=0) -return_midi = roll.Multitrack(tracks=[i], tempo=120.0, downbeat=[0, 96, 192, 288], beat_resolution=24) -roll.write(return_midi, OUTPUT_PATH) - -plt.imshow(instruments.T, cmap='gray') -plt.savefig(OUTPUT_PATH_PIANOROLL) diff --git a/settings.py b/settings.py deleted file mode 100644 index c3b8037..0000000 --- a/settings.py +++ /dev/null @@ -1,4 +0,0 @@ -midi_path = 'data/midi' -samples_path = 'data/samples' -midi_resolution = 96 -beats_per_sample = 1 diff --git a/train.py b/train.py deleted file mode 100644 index 8dd5398..0000000 --- a/train.py +++ /dev/null @@ -1,38 +0,0 @@ -SAMPLES_PATH = 'data/samples.npz' -MODEL_PATH = 'data/autoencoder_model.h5' -EPOCHS = 100 - -import tensorflow as tf -from tensorflow.keras import layers -from keras.layers import Input, Dense, Conv2D, Flatten -from keras.models import Model -import numpy as np -from sys import exit -import pickle - -print('Reading samples from: {}'.format(SAMPLES_PATH)) - -train_samples = np.load(SAMPLES_PATH)['arr_0'] -train_samples = train_samples.reshape(train_samples.shape[0], 1*96*128) -# input = Input(shape=(1,96,128)) -# encoded = Conv2D(filters = 32, kernel_size = 1, activation='relu')(input) -# decoded = Conv2D(filters = 128, kernel_size = 1, activation='sigmoid')(encoded) -# autoencoder = Model(input, decoded) -# -# autoencoder.compile(optimizer='adadelta', -# loss='binary_crossentropy', -# metrics=['accuracy']) - - -encoded = Dense(128, input_shape=(1*96*128)) -decoded = Dense(96*128)(encoded) -autoencoder = Model(input,decoded) - -autoencoder.compile(optimizer='adadelta', - loss='binary_crossentropy', - metrics=['accuracy']) - -autoencoder.fit(train_samples, train_samples, epochs=EPOCHS, batch_size=150) - -autoencoder.save_weights(MODEL_PATH) -print("Saved model to disk")