This commit is contained in:
s444439 2023-05-11 18:11:43 +02:00
parent 00e260e765
commit 055cd16bb9
2 changed files with 22 additions and 25 deletions

View File

@ -1,12 +1,12 @@
import tensorflow import keras
import numpy as np
import tensorflow as tf
import pandas as pd import pandas as pd
model = tensorflow.keras.models.load_model('model.h5') test_data = pd.read_csv("adult_test.csv")
X_test_data = pd.read_csv("X_test.csv").astype(float)
Y_test_data = pd.read_csv("Y_test.csv").astype(float)
model.evaluate(X_test_data, Y_test_data) model = keras.models.load_model("model.h5")
predictions = model.predict(X_test_data) predictions = model.predict(test_data)
predictions.to_csv('predictions.csv', index=False) np.savetxt("predictions.csv", predictions, delimiter=",")

View File

@ -53,9 +53,9 @@ def check_if_data_set_has_division_into_subsets(file_name):
def get_statistics(data): def get_statistics(data):
train_data = pd.read_csv("X_train.csv", dtype={"income": "category"}) train_data = pd.read_csv("adult_train.csv", dtype={"income": "category"})
dev_data = pd.read_csv("X_dev.csv", dtype={"income": "category"}) dev_data = pd.read_csv("adult_dev.csv", dtype={"income": "category"})
test_data = pd.read_csv("X_test.csv", dtype={"income": "category"}) test_data = pd.read_csv("adult_test.csv", dtype={"income": "category"})
print("Wielkość zbioru: ", len(data)) print("Wielkość zbioru: ", len(data))
print("Wielkość zbioru treningowego: ", len(train_data)) print("Wielkość zbioru treningowego: ", len(train_data))
@ -106,34 +106,31 @@ def clean(data):
def train_dev_test(data): def train_dev_test(data):
X = data.copy() train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)
y = pandas.DataFrame(data.pop('education-num'))
X_train, X_temp, Y_train, Y_temp = train_test_split(X, y, test_size=0.3, random_state=1)
X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.3, random_state=1)
X_train.to_csv('X_train.csv', index=False) test_data, dev_data = train_test_split(test_data, test_size=0.33, random_state=42)
X_dev.to_csv('X_dev.csv', index=False)
X_test.to_csv('X_test.csv', index=False) train_data.to_csv("adult_train.csv", index=False)
Y_test.to_csv('Y_test.csv', index=False) dev_data.to_csv("adult_dev.csv", index=False)
Y_train.to_csv('Y_train.csv', index=False) test_data.to_csv("adult_test.csv", index=False)
Y_dev.to_csv('Y_dev.csv', index=False)
return X_train, X_dev, X_test return train_data, dev_data, test_data
def create_model(): def create_model():
data = pd.read_csv('X_train.csv') data = pd.read_csv('adult_train.csv')
X = data.copy() X = data.copy()
y = data["education-num"] y = data["education-num"]
X_train_encoded = pd.get_dummies(X) X_train_encoded = pd.get_dummies(X)
y_train_cat = to_categorical(y) y_train_cat = to_categorical(y)
model = Sequential() model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train_encoded.shape[1])) model.add(Dense(64, activation='relu', input_dim=X_train_encoded.shape[1]))
model.add(Dense(17, activation='softmax')) model.add(Dense(17, activation='sigmoid'))
model.compile(optimizer='adam', model.compile(optimizer='adam',
loss='categorical_crossentropy', loss='binary_crossentropy',
metrics=['accuracy']) metrics=['accuracy'])
model.fit(X_train_encoded, y_train_cat, epochs=10, batch_size=32, validation_data=(X_train_encoded, y_train_cat)) model.fit(X_train_encoded, y_train_cat, epochs=10, batch_size=32, validation_data=(X_train_encoded, y_train_cat))
model.save('model.h5') model.save('model.joblib')
if __name__ == '__main__': if __name__ == '__main__':