ium_444380/train_model.py
Kamil Guttmann 6c8a16fe04
Some checks failed
s444380-training/pipeline/head There was a failure building this commit
Training jenkinsfile
2022-05-03 14:40:26 +02:00

88 lines
2.8 KiB
Python

import sys
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
import numpy as np
tf.config.set_visible_devices([], 'GPU')
# Read and split data
train_data = pd.read_csv("crime_train.csv")
val_data = pd.read_csv("crime_dev.csv")
test_data = pd.read_csv("crime_test.csv")
x_columns = ["DISTRICT", "STREET", "YEAR", "MONTH", "DAY_OF_WEEK", "HOUR", "Lat", "Long"]
y_column = "OFFENSE_CODE_GROUP"
x_train = train_data[x_columns]
y_train = train_data[y_column]
x_val = val_data[x_columns]
y_val = val_data[y_column]
x_test = test_data[x_columns]
y_test = test_data[y_column]
num_categories = len(y_train.unique())
num_features = len(x_columns)
# Train label encoders for categorical data
encoder_y = LabelEncoder()
encoder_day = LabelEncoder()
encoder_dist = LabelEncoder()
encoder_street = LabelEncoder()
encoder_y.fit(y_train)
encoder_day.fit(x_train["DAY_OF_WEEK"])
encoder_dist.fit(x_train["DISTRICT"])
encoder_street.fit(pd.concat([x_val["STREET"], x_test["STREET"], x_train["STREET"]], axis=0))
# Encode train categorical data
y_train = encoder_y.transform(y_train)
x_train["DAY_OF_WEEK"] = encoder_day.transform(x_train["DAY_OF_WEEK"])
x_train["DISTRICT"] = encoder_dist.transform(x_train["DISTRICT"])
x_train["STREET"] = encoder_street.transform(x_train["STREET"])
# Encode train categorical data
y_val = encoder_y.transform(y_val)
x_val["DAY_OF_WEEK"] = encoder_day.transform(x_val["DAY_OF_WEEK"])
x_val["DISTRICT"] = encoder_dist.transform(x_val["DISTRICT"])
x_val["STREET"] = encoder_street.transform(x_val["STREET"])
# Encode train categorical data
y_test = encoder_y.transform(y_test)
x_test["DAY_OF_WEEK"] = encoder_day.transform(x_test["DAY_OF_WEEK"])
x_test["DISTRICT"] = encoder_dist.transform(x_test["DISTRICT"])
x_test["STREET"] = encoder_street.transform(x_test["STREET"])
# Define model
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=num_features))
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(num_categories, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_accuracy'])
if len(sys.argv) > 1:
epochs = sys.argv[1]
else:
epochs = 10
# Train model
model.fit(x_train, y_train, epochs=epochs, validation_data=(x_val, y_val))
# Make predictions
y_pred = model.predict(x_test)
output = [np.argmax(pred) for pred in y_pred]
output_text = encoder_y.inverse_transform(list(output))
# Save predictions
data_to_save = pd.concat([test_data[x_columns], test_data[y_column]], axis = 1)
data_to_save["PREDICTED"] = output_text
data_to_save.to_csv("out.csv")
# Save model
model.save("model")