UM project

This commit is contained in:
Wojciech Jarmosz 2021-06-30 10:22:24 +02:00
commit 56dce9a205
4 changed files with 1353 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
env

1191
heart_problem.csv Normal file

File diff suppressed because it is too large Load Diff

45
requirements.txt Normal file
View File

@ -0,0 +1,45 @@
absl-py==0.13.0
astunparse==1.6.3
cachetools==4.2.2
certifi==2021.5.30
chardet==4.0.0
flatbuffers==1.12
gast==0.4.0
google-auth==1.32.0
google-auth-oauthlib==0.4.4
google-pasta==0.2.0
grpcio==1.34.1
h5py==3.1.0
idna==2.10
joblib==1.0.1
Keras==2.4.3
keras-nightly==2.5.0.dev2021032900
Keras-Preprocessing==1.1.2
Markdown==3.3.4
numpy==1.19.5
oauthlib==3.1.1
opt-einsum==3.3.0
pandas==1.2.5
protobuf==3.17.3
pyasn1==0.4.8
pyasn1-modules==0.2.8
python-dateutil==2.8.1
pytz==2021.1
PyYAML==5.4.1
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7.2
scikit-learn==0.24.2
scipy==1.7.0
six==1.15.0
tensorboard==2.5.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow==2.5.0
tensorflow-estimator==2.5.0
termcolor==1.1.0
threadpoolctl==2.1.0
typing-extensions==3.7.4.3
urllib3==1.26.6
Werkzeug==2.0.1
wrapt==1.12.1

116
script.py Normal file
View File

@ -0,0 +1,116 @@
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn import tree
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
# Przewidywanie choroby serca na podstawie danych i wyników pacjenta.
data = pd.read_csv("./heart_problem.csv")
# Usunięcie wierszy z pustymi wartościami
data.dropna(inplace=True)
# Usuń wartości odstające.
indexes = data[(data['cholesterol'] == 0) | (data['resting bp s'] == 0)].index
data.drop(indexes , inplace=True)
# Skalowanie danych
scaler = MinMaxScaler()
X = data[['age','sex','resting bp s','cholesterol','fasting blood sugar', 'max heart rate']].to_numpy()
Y = data['target'].to_numpy()
X = scaler.fit_transform(X)
# Podział na zbiór trenujący/testowy
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
print(X_train.shape)
print(X_test.shape)
neural_accuracy = None
neural_fscore = None
neural_precision = None
neural_recall = None
sgd_accuracy = None
sgd_fscore = None
sgd_precision = None
sgd_recall = None
tree_accuracy = None
tree_fscore = None
tree_precision = None
tree_recall = None
# --------------------------------------------------------------------------
# Klasyfikacja ma/nie ma chorobę serca na podstawie modelu sieci neuronowej.
def classificator_model():
model = Sequential()
model.add(Dense(8, input_dim=6, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
return model
# Trenowanie modelu
estimator = KerasClassifier(build_fn=classificator_model, epochs=5, batch_size=10, verbose=0)
estimator.fit(X_train, y_train)
predicts = estimator.predict(X_test)
neural_accuracy = accuracy_score(y_test, predicts)
neural_fscore = f1_score(y_test, predicts)
neural_precision = precision_score(y_test, predicts)
neural_recall = recall_score(y_test, predicts)
# --------------------------------------------------------------------------
# Klasyfikacja ma/nie ma chorobę serca na podstawie klasyfikatora SGD.
model = SGDClassifier(max_iter=1000)
model.fit(X_train, y_train)
predicts = model.predict(X_test)
sgd_accuracy = accuracy_score(y_test, predicts)
sgd_fscore = f1_score(y_test, predicts)
sgd_precision = precision_score(y_test, predicts)
sgd_recall = recall_score(y_test, predicts)
# --------------------------------------------------------------------------
# Klasyfikacja ma/nie ma chorobę serca na podstawie drzew decyzyjnych.
model = tree.DecisionTreeClassifier()
model.fit(X_train, y_train)
predicts = model.predict(X_test)
tree_accuracy = accuracy_score(y_test, predicts)
tree_fscore = f1_score(y_test, predicts)
tree_precision = precision_score(y_test, predicts)
tree_recall = recall_score(y_test, predicts)
# ---------------------------------------------------------------------------
print(f'''
Sieć neuronowa:
_____________________________
Accuracy: {neural_accuracy}
Precision: {neural_precision}
Recall: {neural_recall}
F-score: {neural_fscore}
Klasyfikator SGD:
_____________________________
Accuracy: {sgd_accuracy}
Precision: {sgd_precision}
Recall: {sgd_recall}
F-score: {sgd_fscore}
Drzewa decyzyjne:
_____________________________
Accuracy: {tree_accuracy}
Precision: {tree_precision}
Recall: {tree_recall}
F-score: {tree_fscore}
''')