Added Sacred
Some checks failed
s444380-training/pipeline/head There was a failure building this commit
Some checks failed
s444380-training/pipeline/head There was a failure building this commit
This commit is contained in:
parent
ca0d892c3b
commit
798937cb87
@ -16,6 +16,6 @@ RUN apt-get update && apt-get install -y python3-pip unzip && rm -rf /var/lib/ap
|
|||||||
|
|
||||||
RUN export PATH="$PATH:/root/.local/bin"
|
RUN export PATH="$PATH:/root/.local/bin"
|
||||||
|
|
||||||
RUN pip3 install kaggle pandas scikit-learn tensorflow keras matplotlib numpy
|
RUN pip3 install kaggle pandas scikit-learn tensorflow keras matplotlib numpy sacred
|
||||||
|
|
||||||
RUN mkdir /.kaggle && chmod o+w /.kaggle
|
RUN mkdir /.kaggle && chmod o+w /.kaggle
|
||||||
|
@ -28,8 +28,8 @@ pipeline {
|
|||||||
stage("Train model") {
|
stage("Train model") {
|
||||||
steps {
|
steps {
|
||||||
sh "chmod u+x ./train_model.py"
|
sh "chmod u+x ./train_model.py"
|
||||||
sh "python3 ./train_model.py $EPOCHS"
|
sh "python3 ./train_model.py with 'epcohs=$EPOCHS'"
|
||||||
archiveArtifacts artifacts: "model/*, out.csv", onlyIfSuccessful: true
|
archiveArtifacts artifacts: "model/*, out.csv, experiments/*/*", onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
156
train_model.py
156
train_model.py
@ -7,81 +7,97 @@ from keras.layers import Dense
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from sacred import Experiment
|
||||||
|
from sacred.observers import FileStorageObserver, MongoObserver
|
||||||
|
|
||||||
tf.config.set_visible_devices([], 'GPU')
|
ex = Experiment()
|
||||||
|
ex.observers.append(FileStorageObserver("experiments"))
|
||||||
# Read and split data
|
#ex.observers.append(MongoObserver(url="mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017", db_name="sacred"))
|
||||||
train_data = pd.read_csv("crime_train.csv")
|
|
||||||
val_data = pd.read_csv("crime_dev.csv")
|
|
||||||
test_data = pd.read_csv("crime_test.csv")
|
|
||||||
|
|
||||||
x_columns = ["DISTRICT", "STREET", "YEAR", "MONTH", "DAY_OF_WEEK", "HOUR", "Lat", "Long"]
|
|
||||||
y_column = "OFFENSE_CODE_GROUP"
|
|
||||||
|
|
||||||
x_train = train_data[x_columns]
|
|
||||||
y_train = train_data[y_column]
|
|
||||||
x_val = val_data[x_columns]
|
|
||||||
y_val = val_data[y_column]
|
|
||||||
x_test = test_data[x_columns]
|
|
||||||
y_test = test_data[y_column]
|
|
||||||
|
|
||||||
num_categories = len(y_train.unique())
|
|
||||||
num_features = len(x_columns)
|
|
||||||
|
|
||||||
# Train label encoders for categorical data
|
|
||||||
encoder_y = LabelEncoder()
|
|
||||||
encoder_day = LabelEncoder()
|
|
||||||
encoder_dist = LabelEncoder()
|
|
||||||
encoder_street = LabelEncoder()
|
|
||||||
encoder_y.fit(y_train)
|
|
||||||
encoder_day.fit(x_train["DAY_OF_WEEK"])
|
|
||||||
encoder_dist.fit(x_train["DISTRICT"])
|
|
||||||
encoder_street.fit(pd.concat([x_val["STREET"], x_test["STREET"], x_train["STREET"]], axis=0))
|
|
||||||
|
|
||||||
|
|
||||||
# Encode train categorical data
|
@ex.config
|
||||||
y_train = encoder_y.transform(y_train)
|
def config():
|
||||||
x_train["DAY_OF_WEEK"] = encoder_day.transform(x_train["DAY_OF_WEEK"])
|
|
||||||
x_train["DISTRICT"] = encoder_dist.transform(x_train["DISTRICT"])
|
|
||||||
x_train["STREET"] = encoder_street.transform(x_train["STREET"])
|
|
||||||
|
|
||||||
# Encode train categorical data
|
|
||||||
y_val = encoder_y.transform(y_val)
|
|
||||||
x_val["DAY_OF_WEEK"] = encoder_day.transform(x_val["DAY_OF_WEEK"])
|
|
||||||
x_val["DISTRICT"] = encoder_dist.transform(x_val["DISTRICT"])
|
|
||||||
x_val["STREET"] = encoder_street.transform(x_val["STREET"])
|
|
||||||
|
|
||||||
# Encode train categorical data
|
|
||||||
y_test = encoder_y.transform(y_test)
|
|
||||||
x_test["DAY_OF_WEEK"] = encoder_day.transform(x_test["DAY_OF_WEEK"])
|
|
||||||
x_test["DISTRICT"] = encoder_dist.transform(x_test["DISTRICT"])
|
|
||||||
x_test["STREET"] = encoder_street.transform(x_test["STREET"])
|
|
||||||
|
|
||||||
# Define model
|
|
||||||
model = Sequential()
|
|
||||||
model.add(Dense(32, activation='relu', input_dim=num_features))
|
|
||||||
model.add(Dense(64, activation='relu'))
|
|
||||||
model.add(Dense(128, activation='relu'))
|
|
||||||
model.add(Dense(num_categories, activation='softmax'))
|
|
||||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_accuracy'])
|
|
||||||
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
epochs = sys.argv[1]
|
|
||||||
else:
|
|
||||||
epochs = 10
|
epochs = 10
|
||||||
|
|
||||||
# Train model
|
|
||||||
model.fit(x_train, y_train, epochs=int(epochs), validation_data=(x_val, y_val))
|
|
||||||
|
|
||||||
# Make predictions
|
@ex.automain
|
||||||
y_pred = model.predict(x_test)
|
def main(epochs):
|
||||||
output = [np.argmax(pred) for pred in y_pred]
|
tf.config.set_visible_devices([], 'GPU')
|
||||||
output_text = encoder_y.inverse_transform(list(output))
|
|
||||||
|
|
||||||
# Save predictions
|
# Read and split data
|
||||||
data_to_save = pd.concat([test_data[x_columns], test_data[y_column]], axis = 1)
|
train_data = pd.read_csv("crime_train.csv")
|
||||||
data_to_save["PREDICTED"] = output_text
|
val_data = pd.read_csv("crime_dev.csv")
|
||||||
data_to_save.to_csv("out.csv")
|
test_data = pd.read_csv("crime_test.csv")
|
||||||
|
|
||||||
# Save model
|
x_columns = ["DISTRICT", "STREET", "YEAR", "MONTH", "DAY_OF_WEEK", "HOUR", "Lat", "Long"]
|
||||||
model.save("model")
|
y_column = "OFFENSE_CODE_GROUP"
|
||||||
|
|
||||||
|
x_train = train_data[x_columns]
|
||||||
|
y_train = train_data[y_column]
|
||||||
|
x_val = val_data[x_columns]
|
||||||
|
y_val = val_data[y_column]
|
||||||
|
x_test = test_data[x_columns]
|
||||||
|
y_test = test_data[y_column]
|
||||||
|
|
||||||
|
num_categories = len(y_train.unique())
|
||||||
|
num_features = len(x_columns)
|
||||||
|
|
||||||
|
# Train label encoders for categorical data
|
||||||
|
encoder_y = LabelEncoder()
|
||||||
|
encoder_day = LabelEncoder()
|
||||||
|
encoder_dist = LabelEncoder()
|
||||||
|
encoder_street = LabelEncoder()
|
||||||
|
encoder_y.fit(y_train)
|
||||||
|
encoder_day.fit(x_train["DAY_OF_WEEK"])
|
||||||
|
encoder_dist.fit(x_train["DISTRICT"])
|
||||||
|
encoder_street.fit(pd.concat([x_val["STREET"], x_test["STREET"], x_train["STREET"]], axis=0))
|
||||||
|
|
||||||
|
|
||||||
|
# Encode train categorical data
|
||||||
|
y_train = encoder_y.transform(y_train)
|
||||||
|
x_train["DAY_OF_WEEK"] = encoder_day.transform(x_train["DAY_OF_WEEK"])
|
||||||
|
x_train["DISTRICT"] = encoder_dist.transform(x_train["DISTRICT"])
|
||||||
|
x_train["STREET"] = encoder_street.transform(x_train["STREET"])
|
||||||
|
|
||||||
|
# Encode train categorical data
|
||||||
|
y_val = encoder_y.transform(y_val)
|
||||||
|
x_val["DAY_OF_WEEK"] = encoder_day.transform(x_val["DAY_OF_WEEK"])
|
||||||
|
x_val["DISTRICT"] = encoder_dist.transform(x_val["DISTRICT"])
|
||||||
|
x_val["STREET"] = encoder_street.transform(x_val["STREET"])
|
||||||
|
|
||||||
|
# Encode train categorical data
|
||||||
|
y_test = encoder_y.transform(y_test)
|
||||||
|
x_test["DAY_OF_WEEK"] = encoder_day.transform(x_test["DAY_OF_WEEK"])
|
||||||
|
x_test["DISTRICT"] = encoder_dist.transform(x_test["DISTRICT"])
|
||||||
|
x_test["STREET"] = encoder_street.transform(x_test["STREET"])
|
||||||
|
|
||||||
|
# Define model
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(32, activation='relu', input_dim=num_features))
|
||||||
|
model.add(Dense(64, activation='relu'))
|
||||||
|
model.add(Dense(128, activation='relu'))
|
||||||
|
model.add(Dense(num_categories, activation='softmax'))
|
||||||
|
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_accuracy'])
|
||||||
|
|
||||||
|
# Train model
|
||||||
|
history = model.fit(x_train, y_train, epochs=int(epochs), validation_data=(x_val, y_val))
|
||||||
|
|
||||||
|
# Make predictions
|
||||||
|
y_pred = model.predict(x_test)
|
||||||
|
output = [np.argmax(pred) for pred in y_pred]
|
||||||
|
output_text = encoder_y.inverse_transform(list(output))
|
||||||
|
|
||||||
|
# Save predictions
|
||||||
|
data_to_save = pd.concat([test_data[x_columns], test_data[y_column]], axis = 1)
|
||||||
|
data_to_save["PREDICTED"] = output_text
|
||||||
|
data_to_save.to_csv("out.csv")
|
||||||
|
|
||||||
|
# Save model
|
||||||
|
model.save("model")
|
||||||
|
ex.add_artifact("model/saved_model.pb")
|
||||||
|
|
||||||
|
# Log metrics
|
||||||
|
ex.log_scalar("loss", history.history["loss"])
|
||||||
|
ex.log_scalar("accuracy", history.history["accuracy"])
|
||||||
|
ex.log_scalar("val_loss", history.history["val_loss"])
|
||||||
|
ex.log_scalar("val_accuracy", history.history["val_accuracy"])
|
||||||
|
Loading…
Reference in New Issue
Block a user