-init-first-stage-

This commit is contained in:
Cezary Pukownik 2019-05-28 12:40:26 +02:00
parent 55de001c95
commit 0a297e331d
27 changed files with 611 additions and 166 deletions

20
docs/document.aux Normal file
View File

@ -0,0 +1,20 @@
\relax
\@writefile{toc}{\contentsline {section}{\numberline {1}Wst\IeC {\k e}p}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Muzyka}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}MIDI, Muzyka jako Informacje}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}MIDI}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Pianoroll}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Muzyka jako tr\IeC {\'o}jwymiarowa tablica}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Generatwne sieci neuronowe - GANy, VAE, LSTMy}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Autoencodery, VAE}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}LSTM}{2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Modele generatywne stosowane w generowaniu muzyki}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Project Magenta}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}MuseGAN}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}VAE-MIDI}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Budowanie generatora muzyki}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Wyodr\IeC {\k e}bnienie danych z plik\IeC {\'o}w MIDI}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Przygotowanie Modelu GAN}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Proces uczenia, pr\IeC {\'o}bki co kilka epoch\IeC {\'o}w, costloss wykres}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Pr\IeC {\'o}bki ko\IeC {\'n}cowe, jak\IeC {\k a} muzyk\IeC {\k e} da si\IeC {\k e} z tego wygenerowa\IeC {\'c}}{3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Podsumowanie}{3}\protected@file@percent }

151
docs/document.log Normal file
View File

@ -0,0 +1,151 @@
This is pdfTeX, Version 3.14159265-2.6-1.40.19 (TeX Live 2018/W32TeX) (preloaded format=pdflatex 2019.2.21) 28 MAY 2019 12:32
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
**document.tex
(./document.tex
LaTeX2e <2018-12-01>
(c:/software/latex/texmf-dist/tex/latex/base/article.cls
Document Class: article 2018/09/03 v1.4i Standard LaTeX document class
(c:/software/latex/texmf-dist/tex/latex/base/size10.clo
File: size10.clo 2018/09/03 v1.4i Standard LaTeX file (size option)
)
\c@part=\count80
\c@section=\count81
\c@subsection=\count82
\c@subsubsection=\count83
\c@paragraph=\count84
\c@subparagraph=\count85
\c@figure=\count86
\c@table=\count87
\abovecaptionskip=\skip41
\belowcaptionskip=\skip42
\bibindent=\dimen102
)
(c:/software/latex/texmf-dist/tex/latex/polski/polski.sty
Package: polski 2017/05/04 v1.3.4 Polish language package
Switching to Polish text encoding and Polish maths fonts.
(c:/software/latex/texmf-dist/tex/latex/base/ot4enc.def
File: ot4enc.def 2018/08/11 v2.0j Standard LaTeX file
Now handling font encoding OT4 ...
... no UTF-8 mapping file for font encoding OT4
)
LaTeX Font Info: Try loading font information for OT4+cmr on input line 360.
(c:/software/latex/texmf-dist/tex/latex/polski/ot4cmr.fd
File: ot4cmr.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW)
)
LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal'
(Font) OT1/cmr/bx/n --> OT4/cmr/bx/n on input line 360.
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal'
(Font) OT1/cmss/m/n --> OT4/cmss/m/n on input line 360.
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal'
(Font) OT1/cmr/m/it --> OT4/cmr/m/it on input line 360.
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal'
(Font) OT1/cmtt/m/n --> OT4/cmtt/m/n on input line 360.
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold'
(Font) OT1/cmss/bx/n --> OT4/cmss/bx/n on input line 360.
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold'
(Font) OT1/cmr/bx/it --> OT4/cmr/bx/it on input line 360.
LaTeX Font Info: Encoding `OT1' has changed to `OT4' for symbol font
(Font) `operators' in the math version `normal' on input line 360.
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
(Font) OT1/cmr/m/n --> OT4/cmr/m/n on input line 360.
LaTeX Font Info: Overwriting symbol font `letters' in version `normal'
(Font) OML/cmm/m/it --> OML/plm/m/it on input line 360.
LaTeX Font Info: Overwriting symbol font `symbols' in version `normal'
(Font) OMS/cmsy/m/n --> OMS/plsy/m/n on input line 360.
LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal'
(Font) OMX/cmex/m/n --> OMX/plex/m/n on input line 360.
LaTeX Font Info: Encoding `OT1' has changed to `OT4' for symbol font
(Font) `operators' in the math version `bold' on input line 360.
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
(Font) OT1/cmr/bx/n --> OT4/cmr/bx/n on input line 360.
LaTeX Font Info: Overwriting symbol font `letters' in version `bold'
(Font) OML/cmm/b/it --> OML/plm/b/it on input line 360.
LaTeX Font Info: Overwriting symbol font `symbols' in version `bold'
(Font) OMS/cmsy/b/n --> OMS/plsy/b/n on input line 360.
)
LaTeX Warning: Unused global option(s):
[utf8].
(./document.aux)
\openout1 = `document.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OT4/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Try loading font information for OML+plm on input line 20.
(c:/software/latex/texmf-dist/tex/latex/polski/omlplm.fd
File: omlplm.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW)
)
LaTeX Font Info: Try loading font information for OMS+plsy on input line 20.
(c:/software/latex/texmf-dist/tex/latex/polski/omsplsy.fd
File: omsplsy.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW)
)
LaTeX Font Info: Try loading font information for OMX+plex on input line 20.
(c:/software/latex/texmf-dist/tex/latex/polski/omxplex.fd
File: omxplex.fd 2008/02/24 v1.2.1 Font defs for fonts PL (MW)
)
LaTeX Font Info: External font `plex10' loaded for size
(Font) <12> on input line 20.
LaTeX Font Info: External font `plex10' loaded for size
(Font) <8> on input line 20.
LaTeX Font Info: External font `plex10' loaded for size
(Font) <6> on input line 20.
[1
{c:/software/latex/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] (./document.t
oc
LaTeX Font Info: External font `plex10' loaded for size
(Font) <10> on input line 2.
LaTeX Font Info: External font `plex10' loaded for size
(Font) <7> on input line 2.
LaTeX Font Info: External font `plex10' loaded for size
(Font) <5> on input line 2.
)
\tf@toc=\write3
\openout3 = `document.toc'.
[1] [2]
[3] (./document.aux) )
Here is how much of TeX's memory you used:
474 strings out of 492616
5750 string characters out of 6131816
66703 words of memory out of 5000000
4428 multiletter control sequences out of 15000+600000
13779 words of font info for 39 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191
23i,7n,25p,379b,252s stack positions out of 5000i,500n,10000p,200000b,80000s
{c:/software/latex/texmf-dist/fonts/enc/dvips/pl/plrm.enc
}<c:/software/latex/texmf-dist/fonts/type1/public/pl/plbx10.pfb><c:/software/la
tex/texmf-dist/fonts/type1/public/pl/plbx12.pfb><c:/software/latex/texmf-dist/f
onts/type1/public/pl/plr10.pfb><c:/software/latex/texmf-dist/fonts/type1/public
/pl/plr12.pfb><c:/software/latex/texmf-dist/fonts/type1/public/pl/plr17.pfb><c:
/software/latex/texmf-dist/fonts/type1/public/pl/plr9.pfb>
Output written on document.pdf (4 pages, 61011 bytes).
PDF statistics:
42 PDF objects out of 1000 (max. 8388607)
29 compressed objects within 1 object stream
0 named destinations out of 1000 (max. 500000)
1 words of extra memory for PDF output out of 10000 (max. 10000000)

BIN
docs/document.pdf Normal file

Binary file not shown.

BIN
docs/document.synctex.gz Normal file

Binary file not shown.

80
docs/document.tex Normal file
View File

@ -0,0 +1,80 @@
\documentclass[utf8]{article}
\usepackage{polski}
\title{%
Generowanie muzyki \\
przy pomocy głębokiego uczenia \\
\large Music generation with deep learning}
\author{%
Cezary Pukownik \\
\newline
\small Opiekun pracy:\\
dr hab. Tomasz Górecki}
\date{2019-05-28}
\begin{document}
\maketitle
\newpage
\pagenumbering{arabic}
\tableofcontents
\newpage
\section{Wstęp}
To jest wstep do pracy magisterskiej
\subsection{Muzyka}
Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac, co o tym sądze, oraz czy sztuczna inteligencja zastapi muzyków w przyszłości.
\section{MIDI, Muzyka jako Informacje}
Tutaj opiszę w jaki sposób muzyka jest zapisywana jako informacje komputerowe, protokuł midi, przedstawienie muzyki jako pianorolle.
\subsection{MIDI}
Tutaj opiszę protokuł MIDI
\subsection{Pianoroll}
Tutaj opisze co todsdsddsdss są pianorolle, jak je czytać i czemu służą.
\subsection{Muzyka jako trójwymiarowa tablica}
Tutaj opisze dlaczego muzykę moża opisać jako trójwymiarowa tablicę.
\section{Generatwne sieci neuronowe - GANy, VAE, LSTMy}
Tutaj będzie opisane, dlaczego sieci neuronowe, radzą sobie lepiej w produkowaniu muzyki niż inne modele. Oraz jakie modele są odpowidnie do pewnych zastosowań, JAZZ - LSTM, bardziej ustrukturyzowana - VAE itp.
\subsection{Autoencodery, VAE}
Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac
\subsection{LSTM}
Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac
\section{Modele generatywne stosowane w generowaniu muzyki}
Przykłady gotowych podeść do generowania muzyki, oraz jakie modele zostały zastosowane. dlaczego takie itp.
\subsection{Project Magenta}
Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac
\subsection{MuseGAN}
Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac
\subsection{VAE-MIDI}
Teraz opowiem troche o muzyce, i dlaczego trudno jest ja generowac
\section{Budowanie generatora muzyki}
W tym rozdzialę opiszę w jaki sposób zbudowałem swój własny geneator muzyki, jak przechodził procesz uczenia, jakie próbki udało mi się wygenrować. Opis kodu który napisałem.
\subsection{Wyodrębnienie danych z plików MIDI}
\subsection{Przygotowanie Modelu GAN}
\subsection{Proces uczenia, próbki co kilka epochów, costloss wykres}
\subsection{Próbki końcowe, jaką muzykę da się z tego wygenerować}
\section{Podsumowanie}
Ostateczne wnioski, czy muzyka generowana komputerowa da się lubić? Czy to pozytywnie wpłynie na przemysł muzyczny? Tak i nie. Może złużyć jako inspiracja dla muzyków, proces wspierający. Z drugiej strony może obnizy koszty produkowania muzyki pop, która i tak jest już bardzo powtarzalna. Czy sieci neuronowe nauczą się produkować Hity?
\end{document}

19
docs/document.toc Normal file
View File

@ -0,0 +1,19 @@
\contentsline {section}{\numberline {1}Wst\IeC {\k e}p}{2}%
\contentsline {subsection}{\numberline {1.1}Muzyka}{2}%
\contentsline {section}{\numberline {2}MIDI, Muzyka jako Informacje}{2}%
\contentsline {subsection}{\numberline {2.1}MIDI}{2}%
\contentsline {subsection}{\numberline {2.2}Pianoroll}{2}%
\contentsline {subsection}{\numberline {2.3}Muzyka jako tr\IeC {\'o}jwymiarowa tablica}{2}%
\contentsline {section}{\numberline {3}Generatwne sieci neuronowe - GANy, VAE, LSTMy}{2}%
\contentsline {subsection}{\numberline {3.1}Autoencodery, VAE}{2}%
\contentsline {subsection}{\numberline {3.2}LSTM}{2}%
\contentsline {section}{\numberline {4}Modele generatywne stosowane w generowaniu muzyki}{3}%
\contentsline {subsection}{\numberline {4.1}Project Magenta}{3}%
\contentsline {subsection}{\numberline {4.2}MuseGAN}{3}%
\contentsline {subsection}{\numberline {4.3}VAE-MIDI}{3}%
\contentsline {section}{\numberline {5}Budowanie generatora muzyki}{3}%
\contentsline {subsection}{\numberline {5.1}Wyodr\IeC {\k e}bnienie danych z plik\IeC {\'o}w MIDI}{3}%
\contentsline {subsection}{\numberline {5.2}Przygotowanie Modelu GAN}{3}%
\contentsline {subsection}{\numberline {5.3}Proces uczenia, pr\IeC {\'o}bki co kilka epoch\IeC {\'o}w, costloss wykres}{3}%
\contentsline {subsection}{\numberline {5.4}Pr\IeC {\'o}bki ko\IeC {\'n}cowe, jak\IeC {\k a} muzyk\IeC {\k e} da si\IeC {\k e} z tego wygenerowa\IeC {\'c}}{3}%
\contentsline {section}{\numberline {6}Podsumowanie}{3}%

View File

@ -1,22 +0,0 @@
import numpy as np
from keras.layers import Input, Dense, Conv2D
from keras.models import Model
GENERATED_BEAT_PATH = 'data/output/generated_bar'
MODEL_PATH = 'data/autoencoder_model.h5'
SAMPLES_PATH = 'data/samples.npz'
input = Input(shape=(1,96,128))
encoded = Conv2D(filters = 32, kernel_size = 1)(input)
decoded = Conv2D(filters = 128, kernel_size = 1)(encoded)
autoencoder = Model(input, decoded)
# load weights into new model
autoencoder.load_weights(MODEL_PATH)
print("Loaded model from disk")
# generate_seed = np.random.rand(1,1,96,128)
generate_seed = np.load(SAMPLES_PATH)['arr_0'][0:]
generated_beat = autoencoder.predict(generate_seed)
np.savez_compressed(GENERATED_BEAT_PATH, generated_beat)

View File

@ -1,76 +0,0 @@
import settings
import pypianoroll as roll
import matplotlib.pyplot as plt
import numpy as np
import os
from math import floor
MIDI_DIRECTORY = settings.midi_path
SAMPLES_DIRECTORY = settings.samples_path
MIDI_RESOLUTION = settings.midi_resolution
BEAT_PER_BATCH = settings.beats_per_sample
samples = np.empty((0,BEAT_PER_BATCH,96,128))
def erase_note_lenth(pianoroll):
if pianoroll.ndim != 2:
raise ValueError('pianoroll should be two dimentional')
now_block = []
for x in pianoroll:
this = None
prev = None
new_line =[]
for y in x:
this = y
if prev != None:
if this > 0 and prev > 0:
new_line.append(0)
else:
new_line.append(y)
else:
new_line.append(y)
prev = this
now_block.append(new_line)
return np.array(now_block)
print('Start convertion')
for midi_file in os.listdir(MIDI_DIRECTORY):
try:
print('Reading file: {}'.format(midi_file))
song = roll.Multitrack('{}/{}'.format(MIDI_DIRECTORY, midi_file))
# no_drums_mt = roll.Multitrack(tempo=120.0, downbeat=[0, 96, 192, 288], beat_resolution=24)
intruments_only = roll.Multitrack(tempo=120.0, beat_resolution=24)
for track in song.tracks:
if track.is_drum == False:
print(track.name, track.program)
intruments_only.append_track(track=track, pianoroll=track.pianoroll)
instrument_track = track.pianoroll
# plt.imshow(instrument_track[24*8:24*24].T)
# plt.savefig('data/0_{}.png'.format(midi_file))
instrument_track = erase_note_lenth(instrument_track.T).T
# plt.imshow(instrument_track[24*8:24*24].T)
# plt.savefig('data/1_{}.png'.format(midi_file))
# instruments = no_drums_mt.get_merged_pianoroll(mode='sum')
beats = floor( (instrument_track.shape[0] / MIDI_RESOLUTION) / BEAT_PER_BATCH) * BEAT_PER_BATCH
notes_for_beats = beats * MIDI_RESOLUTION
print('beats: ', beats)
samples_of_song = np.asarray(np.split(instrument_track[:notes_for_beats], beats))
samples_of_song = samples_of_song.reshape(int(beats/BEAT_PER_BATCH),BEAT_PER_BATCH,96,128)
print('Converted samples: {}'.format(samples_of_song.shape))
samples = np.concatenate([samples_of_song,samples], axis=0)
np.savez_compressed(SAMPLES_DIRECTORY,samples)
except Exception as error:
print('Convertion faild: {}'.format(error))
pass
finally:
print('Done!')

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

BIN
project/data/samples.npz Normal file

Binary file not shown.

26
project/generate.py Normal file
View File

@ -0,0 +1,26 @@
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Conv2D
from keras.models import Model
import settings
#model
input_shape = settings.midi_resolution*128
input_img = tf.keras.layers.Input(shape=(input_shape,))
encoded = tf.keras.layers.Dense(160, activation='relu')(input_img)
decoded = tf.keras.layers.Dense(input_shape, activation='sigmoid')(encoded)
autoencoder = tf.keras.models.Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta',
loss='categorical_crossentropy',
metrics=['accuracy'])
# load weights into new model
autoencoder.load_weights(settings.model_path)
print("Loaded model from {}".format(settings.model_path))
# generate_seed = np.random.rand(12288).reshape(1,12288)
generate_seed = np.load(settings.samples_path)['arr_0'][15].reshape(1,12288)
generated_sample = autoencoder.predict(generate_seed)
np.savez_compressed(settings.generated_sample_path, generated_sample)

249
project/midi_to_samples.py Normal file
View File

@ -0,0 +1,249 @@
import settings
import pypianoroll as roll
import matplotlib.pyplot as plt
import numpy as np
import os
from tqdm import tqdm
from math import floor
import sys
from sklearn.preprocessing import MinMaxScaler
midi_program = {
0 : 'Perc',
1 : 'Acoustic Grand Piano',
2 : 'Bright Acoustic Piano',
3 : 'Electric Grand Piano',
4 : 'Honky-tonk Piano',
5 : 'Electric Piano 1',
6 : 'Electric Piano 2',
7 : 'Harpsichord',
8 : 'Clavi',
9 : 'Celesta',
10 : 'Glockenspiel',
11 : 'Music Box',
12 : 'Vibraphone',
13 : 'Marimba',
14 : 'Xylophone',
15 : 'Tubular Bells',
16 : 'Dulcimer',
17 : 'Drawbar Organ',
18 : 'Percussive Organ',
19 : 'Rock Organ',
20 : 'Church Organ',
21 : 'Reed Organ',
22 : 'Accordion',
23 : 'Harmonica',
24 : 'Tango Accordion',
25 : 'Acoustic Guitar (nylon)',
26 : 'Acoustic Guitar (steel)',
27 : 'Electric Guitar (jazz)',
28 : 'Electric Guitar (clean)',
29 : 'Electric Guitar (muted)',
30 : 'Overdriven Guitar',
31 : 'Distortion Guitar',
32 : 'Guitar harmonics',
33 : 'Acoustic Bass',
34 : 'Electric Bass (finger)',
35 : 'Electric Bass (pick)',
36 : 'Fretless Bass',
37 : 'Slap Bass 1',
38 : 'Slap Bass 2',
39 : 'Synth Bass 1',
40 : 'Synth Bass 2',
41 : 'Violin',
42 : 'Viola',
43 : 'Cello',
44 : 'Contrabass',
45 : 'Tremolo Strings',
46 : 'Pizzicato Strings',
47 : 'Orchestral Harp',
48 : 'Timpani',
49 : 'String Ensemble 1',
50 : 'String Ensemble 2',
51 : 'SynthStrings 1',
52 : 'SynthStrings 2',
53 : 'Choir Aahs',
54 : 'Voice Oohs',
55 : 'Synth Voice',
56 : 'Orchestra Hit',
57 : 'Trumpet',
58 : 'Trombone',
59 : 'Tuba',
60 : 'Muted Trumpet',
61 : 'French Horn',
62 : 'Brass Section',
63 : 'SynthBrass 1',
64 : 'SynthBrass 2',
65 : 'Soprano Sax',
66 : 'Alto Sax',
67 : 'Tenor Sax',
68 : 'Baritone Sax',
69 : 'Oboe',
70 : 'English Horn',
71 : 'Bassoon',
72 : 'Clarinet',
73 : 'Piccolo',
74 : 'Flute',
75 : 'Recorder',
76 : 'Pan Flute',
77 : 'Blown Bottle',
78 : 'Shakuhachi',
79 : 'Whistle',
80 : 'Ocarina',
81 : 'Lead 1 (square)',
82 : 'Lead 2 (sawtooth)',
83 : 'Lead 3 (calliope)',
84 : 'Lead 4 (chiff)',
85 : 'Lead 5 (charang)',
86 : 'Lead 6 (voice)',
87 : 'Lead 7 (fifths)',
88 : 'Lead 8 (bass + lead)',
89 : 'Pad 1 (new age)',
90 : 'Pad 2 (warm)',
91 : 'Pad 3 (polysynth)',
92 : 'Pad 4 (choir)',
93 : 'Pad 5 (bowed)',
94 : 'Pad 6 (metallic)',
95 : 'Pad 7 (halo)',
96 : 'Pad 8 (sweep)',
97 : 'FX 1 (rain)',
98 : 'FX 2 (soundtrack)',
99 : 'FX 3 (crystal)',
100 : 'FX 4 (atmosphere)',
101 : 'FX 5 (brightness)',
102 : 'FX 6 (goblins)',
103 : 'FX 7 (echoes)',
104 : 'FX 8 (sci-fi)',
105 : 'Sitar',
106 : 'Banjo',
107 : 'Shamisen',
108 : 'Koto',
109 : 'Kalimba',
110 : 'Bag pipe',
111 : 'Fiddle',
112 : 'Shanai',
113 : 'Tinkle Bell',
114 : 'Agogo',
115 : 'Steel Drums',
116 : 'Woodblock',
117 : 'Taiko Drum',
118 : 'Melodic Tom',
119 : 'Synth Drum',
120 : 'Reverse Cymbal',
121 : 'Guitar Fret Noise',
122 : 'Breath Noise',
123 : 'Seashore',
124 : 'Bird Tweet',
125 : 'Telephone Ring',
126 : 'Helicopter',
127 : 'Applause',
128 : 'Gunshot'
}
# strasznie wolna funcja ;/
def trim_notes(pianoroll):
now_block = []
for x in pianoroll:
this = None
prev = None
new_line =[]
for y in x:
this = y
if prev != None:
if this > 0 and prev > 0:
new_line.append(0)
else:
new_line.append(y)
else:
new_line.append(y)
prev = this
now_block.append(new_line)
return np.array(now_block)
def metrum_check(midi_lenght, metrum=4, beat_resolution=24):
return True if midi_lenght % (metrum * beat_resolution) == 0 else False
# >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
# >>> scaler = MinMaxScaler()
# >>> print(scaler.fit(data))
# MinMaxScaler(copy=True, feature_range=(0, 1))
# >>> print(scaler.data_max_)
# [ 1. 18.]
# >>> print(scaler.transform(data))
# [[0. 0. ]
# [0.25 0.25]
# [0.5 0.5 ]
# [1. 1. ]]
# >>> print(scaler.transform([[2, 2]]))
# [[1.5 0. ]]
def to_samples(midi_file_path, midi_res=settings.midi_resolution, ignore_note_lenght=settings.ignore_note_lenght):
print('exporting samples from: {}'.format(midi_file_path))
midi_file = roll.Multitrack(midi_file_path)
samples = None
all_samples = np.empty((0,settings.midi_resolution,128))
for track in midi_file.tracks:
# if not track.is_drum:
if not metrum_check(track.pianoroll.shape[0]):
print('Track skipped')
continue
else:
instrument_track = track.pianoroll
instrument_track = trim_notes(instrument_track.T).T if ignore_note_lenght else instrument_track
scaler = MinMaxScaler()
instrument_track = scaler.fit_transform(instrument_track)
whole_beats = int(instrument_track.shape[0] / midi_res)
samples = instrument_track.reshape(whole_beats, midi_res, 128)
print('Exported {} samples of {}'.format(whole_beats, midi_program[track.program]))
all_samples = np.concatenate([samples, all_samples], axis=0)
return all_samples
def to_midi(samples, output_path=settings.generated_midi_path, program=0, tempo=120, beat_resolution=settings.beat_resolution):
tracks = [roll.Track(samples, program=program)]
return_midi = roll.Multitrack(tracks=tracks, tempo=tempo, downbeat=[0, 96, 192, 288], beat_resolution=beat_resolution)
roll.write(return_midi, settings.generated_midi_path)
def to_png(samples, output_path=settings.generated_pianoroll_path, horizontal=True):
img = samples.T if horizontal else samples
plt.imshow(img, cmap='gray')
plt.savefig(output_path)
def delete_empty_samples(sample_pack):
temp_sample_pack = sample_pack
index_manipulator = 1
for index, sample in enumerate(sample_pack):
if sample.sum() == 0:
temp_sample_pack = np.delete(temp_sample_pack, index-index_manipulator, axis=0)
index_manipulator = index_manipulator + 1
print('Deleted {} empty samples'.format(index_manipulator-1))
return temp_sample_pack
def main():
if sys.argv[1]=='-e':
print('Exporting started...')
sample_pack = np.empty((0,settings.midi_resolution,128))
for midi_file in os.listdir(settings.midi_dir):
midi_file_path = '{}/{}'.format(settings.midi_dir, midi_file)
midi_samples = to_samples(midi_file_path)
if midi_samples is None:
continue
sample_pack = np.concatenate((midi_samples, sample_pack), axis=0)
sample_pack = delete_empty_samples(sample_pack)
np.savez_compressed(settings.samples_dir, sample_pack)
print('Exported {} samples'.format(sample_pack.shape[0]))
elif sys.argv[1]=='-c':
to_midi(settings.generated_sample_path)
print('Samples to midi saved to {}'.format(settings.generated_sample_path))
elif sys.argv[1]=='-p':
sample_pack = np.load(settings.samples_path)['arr_0']
for i, sample in tqdm(enumerate(sample_pack)):
to_png(sample, output_path='data/preview/{}.png'.format(i))
if i>50:
sys.exit()
else:
print('type command afrer -e to export samples, -c to convert samples to midi, -p to preview samples in png')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,18 @@
import pypianoroll as roll
import matplotlib.pyplot as plt
import numpy as np
import os
import settings
instruments = np.load(settings.generated_sample_path)['arr_0'][0]
instruments = instruments.reshape(96,128)
# instruments = instruments>0.5
instruments = instruments*255
i = roll.Track(instruments, program=0)
generated_midi = roll.Multitrack(tracks=[i], tempo=120.0, downbeat=[0, 96, 192, 288], beat_resolution=24)
roll.write(generated_midi, settings.generated_midi_path)
plt.imshow(instruments.T, cmap='gray')
plt.savefig(settings.generated_pianoroll_path)

17
project/settings.py Normal file
View File

@ -0,0 +1,17 @@
# paths
midi_dir = 'data/midi'
samples_dir = 'data/samples'
samples_path = 'data/samples.npz'
model_path = 'data/autoencoder_model.h5'
generated_sample_path = 'data/output/generated_bar.npz'
generated_midi_path = 'data/output/generated_midi.mid'
generated_pianoroll_path = 'data/output/pianoroll.png'
# export_settings
midi_resolution = 96
beat_resolution = 24
beats_per_sample = 1
ignore_note_lenght = False
#train_settings
epochs = 1000

31
project/train.py Normal file
View File

@ -0,0 +1,31 @@
import tensorflow as tf
import settings
from tensorflow.keras import layers
from keras.layers import Input, Dense, Conv2D, Flatten
from keras.models import Model, Sequential
import numpy as np
from sys import exit
import pickle
print('Reading samples from: {}'.format(settings.samples_path))
train_X = np.load(settings.samples_path)['arr_0']
n_samples = train_X.shape[0]
input_shape = settings.midi_resolution*128
train_X = train_X.reshape(n_samples, input_shape)
# encoder model
input_img = tf.keras.layers.Input(shape=(input_shape,))
encoded = tf.keras.layers.Dense(160, activation='relu')(input_img)
decoded = tf.keras.layers.Dense(input_shape, activation='sigmoid')(encoded)
autoencoder = tf.keras.models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
autoencoder.fit(train_X, train_X, epochs=settings.epochs, batch_size=32)
autoencoder.save_weights(settings.model_path)
print("Model save to {}".format(settings.model_path))

View File

@ -1,26 +0,0 @@
GENERATED_BEAT_PATH = 'data/output/generated_bar.npz'
OUTPUT_PATH = 'data/output/generated_midi.mid'
OUTPUT_PATH_PIANOROLL = 'data/output/pianoroll.png'
import pypianoroll as roll
import matplotlib.pyplot as plt
import numpy as np
import os
instruments = np.load(GENERATED_BEAT_PATH)['arr_0'][0]
instruments = instruments.reshape(96,128)
instruments = instruments>instruments.min()*0.3
instruments = instruments*255
# zeros_up = np.zeros((instruments.shape[0],24))
# zeros_down = np.zeros((instruments.shape[0], 20))
# instruments_full = np.concatenate([zeros_up,instruments], axis=1)
# instruments_full = np.concatenate([instruments_full,zeros_down], axis=1)
i = roll.Track(instruments, program=0)
return_midi = roll.Multitrack(tracks=[i], tempo=120.0, downbeat=[0, 96, 192, 288], beat_resolution=24)
roll.write(return_midi, OUTPUT_PATH)
plt.imshow(instruments.T, cmap='gray')
plt.savefig(OUTPUT_PATH_PIANOROLL)

View File

@ -1,4 +0,0 @@
midi_path = 'data/midi'
samples_path = 'data/samples'
midi_resolution = 96
beats_per_sample = 1

View File

@ -1,38 +0,0 @@
SAMPLES_PATH = 'data/samples.npz'
MODEL_PATH = 'data/autoencoder_model.h5'
EPOCHS = 100
import tensorflow as tf
from tensorflow.keras import layers
from keras.layers import Input, Dense, Conv2D, Flatten
from keras.models import Model
import numpy as np
from sys import exit
import pickle
print('Reading samples from: {}'.format(SAMPLES_PATH))
train_samples = np.load(SAMPLES_PATH)['arr_0']
train_samples = train_samples.reshape(train_samples.shape[0], 1*96*128)
# input = Input(shape=(1,96,128))
# encoded = Conv2D(filters = 32, kernel_size = 1, activation='relu')(input)
# decoded = Conv2D(filters = 128, kernel_size = 1, activation='sigmoid')(encoded)
# autoencoder = Model(input, decoded)
#
# autoencoder.compile(optimizer='adadelta',
# loss='binary_crossentropy',
# metrics=['accuracy'])
encoded = Dense(128, input_shape=(1*96*128))
decoded = Dense(96*128)(encoded)
autoencoder = Model(input,decoded)
autoencoder.compile(optimizer='adadelta',
loss='binary_crossentropy',
metrics=['accuracy'])
autoencoder.fit(train_samples, train_samples, epochs=EPOCHS, batch_size=150)
autoencoder.save_weights(MODEL_PATH)
print("Saved model to disk")