This commit is contained in:
s444501 2022-05-02 17:39:03 +02:00
parent 3c4da67f2d
commit a428cad996
2 changed files with 91 additions and 66 deletions

View File

@ -2,16 +2,10 @@ import sys
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from sacred.observers import FileStorageObserver, MongoObserver
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
import pandas as pd import pandas as pd
from sacred import Experiment
# Parametry z konsoli
try:
epochs = int(sys.argv[1])
except:
print('No epoch number passed. Defaulting to 100')
epochs = 100
# Model # Model
@ -29,74 +23,105 @@ class Model(nn.Module):
return x return x
# Ładowanie danych # Sacred
train_set = pd.read_csv('d_train.csv', encoding='latin-1') ex = Experiment()
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']] ex.observers.append(FileStorageObserver('my_runs'))
# Parametry treningu -> my_runs/X/config.json
test_set = pd.read_csv('d_test.csv', encoding='latin-1') # Plik z modelem jako artefakt -> my_runs/X/model.pkl
test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']] # Kod źródłowy -> my_runs/_sources/biblioteki_ml_XXXXXXXXXXX.py
# Wyniki (ostateczny loss) -> my_runs/X/metrics.json
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017',
db_name='sacred'))
# Mapowanie kolumny 'Reviewer_Location' na cyfry @ex.config
le = LabelEncoder() def my_config():
le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']])) epochs = 100
train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
# Mapowanie kolumny 'Branch' na inny sposób @ex.automain
mappings = { def train_main(epochs, _run):
'Disneyland_California': 0, # Parametry z konsoli
'Disneyland_Paris': 1, # try:
'Disneyland_HongKong': 2 # epochs = int(sys.argv[1])
} # except:
train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x]) # print('No epoch number passed. Defaulting to 100')
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x]) # epochs = 100
# Zamiana danych na tensory # Ładowanie danych
X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy() train_set = pd.read_csv('d_train.csv', encoding='latin-1')
X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy() train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
y_train = train_set['Branch'].to_numpy()
y_test = test_set['Branch'].to_numpy()
X_train = torch.FloatTensor(X_train) test_set = pd.read_csv('d_test.csv', encoding='latin-1')
X_test = torch.FloatTensor(X_test) test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
# Hiperparametry # Mapowanie kolumny 'Reviewer_Location' na cyfry
model = Model() le = LabelEncoder()
criterion = nn.CrossEntropyLoss() le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
# Trening # Mapowanie kolumny 'Branch' na inny sposób
losses = [] mappings = {
for i in range(epochs): 'Disneyland_California': 0,
y_pred = model.forward(X_train) 'Disneyland_Paris': 1,
loss = criterion(y_pred, y_train) 'Disneyland_HongKong': 2
losses.append(loss) }
print(f'epoch: {i:2} loss: {loss.item():10.8f}') train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Testy # Zamiana danych na tensory
preds = [] X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
with torch.no_grad(): X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
for val in X_test: y_train = train_set['Branch'].to_numpy()
y_hat = model.forward(val) y_test = test_set['Branch'].to_numpy()
preds.append(y_hat.argmax().item())
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds}) X_train = torch.FloatTensor(X_train)
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])] X_test = torch.FloatTensor(X_test)
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct") y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
# Zapis do pliku # Hiperparametry
df.to_csv('neural_network_prediction_results.csv', index=False) model = Model()
torch.save(model, "model.pkl") criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Trening
losses = []
for i in range(epochs):
y_pred = model.forward(X_train)
loss = criterion(y_pred, y_train)
losses.append(loss)
print(f'epoch: {i:2} loss: {loss.item():10.8f}')
optimizer.zero_grad()
loss.backward()
optimizer.step()
_run.log_scalar("training.final_loss", losses[-1].item()) # Ostateczny loss
# Testy
preds = []
with torch.no_grad():
for val in X_test:
y_hat = model.forward(val)
preds.append(y_hat.argmax().item())
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
# Zapis do pliku
df.to_csv('neural_network_prediction_results.csv', index=False)
torch.save(model, "model.pkl")
# Zapis Sacred
ex.add_artifact("model.pkl")
ex.add_artifact("neural_network_prediction_results.csv")

View File

@ -24,13 +24,13 @@ pipeline {
stage('Train model') { stage('Train model') {
steps { steps {
withEnv(["EPOCH=${params.EPOCH}"]) { withEnv(["EPOCH=${params.EPOCH}"]) {
sh 'python biblioteki_ml.py $EPOCH' sh 'python biblioteki_ml.py with "epochs=$EPOCH"'
} }
} }
} }
stage('Archive model') { stage('Archive artifacts') {
steps { steps {
archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv' archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv, my_run'
} }
} }
stage ('Model - evaluation') { stage ('Model - evaluation') {