Do sacred

This commit is contained in:
PawelDopierala 2024-06-12 12:01:14 +02:00
parent 1245979730
commit 52ede7236e
6 changed files with 50117 additions and 1 deletions

View File

@ -2,7 +2,7 @@ FROM ubuntu:latest
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y python3-pip && \ apt-get install -y python3-pip && \
pip3 install kaggle pandas scikit-learn tensorflow matplotlib mlflow pip3 install kaggle pandas scikit-learn tensorflow matplotlib mlflow git sacred pymongo
RUN useradd -ms /bin/bash jenkins RUN useradd -ms /bin/bash jenkins

44
JenkinsfileSacred Normal file
View File

@ -0,0 +1,44 @@
pipeline {
agent {
dockerfile true
}
parameters{
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'BUILD_SELECTOR'
)
}
triggers {
upstream(upstreamProjects: 'z-s495719-create-dataset', threshold: hudson.model.Result.SUCCESS)
}
stages {
stage('Git') {
steps {
git(
url: "https://git.wmi.amu.edu.pl/s495719/ium_495719.git",
branch: "main"
)
}
}
stage('CopyArtifacts') {
steps {
copyArtifacts fingerprintArtifacts: true, projectName: 'z-s495719-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
}
stage('Script') {
steps {
sh 'chmod 777 sacred/create_model.py'
sh "python3 sacred/create_model.py"
}
}
stage('CreateArtifacts') {
steps {
archiveArtifacts artifacts: 'sacred/hp_model.h5'
}
}
}
}

5001
hp_dev.csv Normal file

File diff suppressed because it is too large Load Diff

1001
hp_test.csv Normal file

File diff suppressed because it is too large Load Diff

44001
hp_train.csv Normal file

File diff suppressed because it is too large Load Diff

69
sacred/create_model.py Normal file
View File

@ -0,0 +1,69 @@
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import regularizers
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from helper import prepare_tensors
ex = Experiment('495719')
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('my_runs'))
@ex.config
def config():
epochs = 10
learning_rate = 0.001
batch_size = 32
@ex.main
def main(epochs, learning_rate, batch_size, _run):
with _run.open_resource("../hp_train.csv") as f:
hp_train = pd.read_csv(f)
with _run.open_resource("../hp_dev.csv") as f:
hp_dev = pd.read_csv(f)
X_train, Y_train = prepare_tensors(hp_train)
X_dev, Y_dev = prepare_tensors(hp_dev)
model = Sequential()
model.add(Dense(64, input_dim=7, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1, activation='linear'))
adam = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
model.compile(optimizer=adam, loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_dev, Y_dev))
model.save('hp_model.h5')
ex.add_artifact("hp_model.h5")
with _run.open_resource("../hp_test.csv") as f:
hp_test = pd.read_csv(f)
X_test, Y_test = prepare_tensors(hp_test)
test_predictions = model.predict(X_test)
predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"])
predictions_df.to_csv('hp_test_predictions.csv', index=False)
rmse = np.sqrt(mean_squared_error(Y_test, test_predictions))
mae = mean_absolute_error(Y_test, test_predictions)
r2 = r2_score(Y_test, test_predictions)
_run.log_scalar("rmse", rmse)
_run.log_scalar("mae", mae)
_run.log_scalar("r2", r2)
if __name__ == '__main__':
ex.run()