sacred
This commit is contained in:
parent
3c4da67f2d
commit
a428cad996
151
biblioteki_ml.py
151
biblioteki_ml.py
@ -2,16 +2,10 @@ import sys
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from sacred.observers import FileStorageObserver, MongoObserver
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from sacred import Experiment
|
||||||
|
|
||||||
# Parametry z konsoli
|
|
||||||
try:
|
|
||||||
epochs = int(sys.argv[1])
|
|
||||||
except:
|
|
||||||
print('No epoch number passed. Defaulting to 100')
|
|
||||||
epochs = 100
|
|
||||||
|
|
||||||
|
|
||||||
# Model
|
# Model
|
||||||
@ -29,74 +23,105 @@ class Model(nn.Module):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
# Ładowanie danych
|
# Sacred
|
||||||
train_set = pd.read_csv('d_train.csv', encoding='latin-1')
|
ex = Experiment()
|
||||||
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
|
ex.observers.append(FileStorageObserver('my_runs'))
|
||||||
|
# Parametry treningu -> my_runs/X/config.json
|
||||||
test_set = pd.read_csv('d_test.csv', encoding='latin-1')
|
# Plik z modelem jako artefakt -> my_runs/X/model.pkl
|
||||||
test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
|
# Kod źródłowy -> my_runs/_sources/biblioteki_ml_XXXXXXXXXXX.py
|
||||||
|
# Wyniki (ostateczny loss) -> my_runs/X/metrics.json
|
||||||
|
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017',
|
||||||
|
db_name='sacred'))
|
||||||
|
|
||||||
|
|
||||||
# Mapowanie kolumny 'Reviewer_Location' na cyfry
|
@ex.config
|
||||||
le = LabelEncoder()
|
def my_config():
|
||||||
le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
|
epochs = 100
|
||||||
train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
|
|
||||||
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
|
|
||||||
|
|
||||||
|
|
||||||
# Mapowanie kolumny 'Branch' na inny sposób
|
@ex.automain
|
||||||
mappings = {
|
def train_main(epochs, _run):
|
||||||
'Disneyland_California': 0,
|
# Parametry z konsoli
|
||||||
'Disneyland_Paris': 1,
|
# try:
|
||||||
'Disneyland_HongKong': 2
|
# epochs = int(sys.argv[1])
|
||||||
}
|
# except:
|
||||||
train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
|
# print('No epoch number passed. Defaulting to 100')
|
||||||
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
|
# epochs = 100
|
||||||
|
|
||||||
|
|
||||||
# Zamiana danych na tensory
|
# Ładowanie danych
|
||||||
X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
|
train_set = pd.read_csv('d_train.csv', encoding='latin-1')
|
||||||
X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
|
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
|
||||||
y_train = train_set['Branch'].to_numpy()
|
|
||||||
y_test = test_set['Branch'].to_numpy()
|
|
||||||
|
|
||||||
X_train = torch.FloatTensor(X_train)
|
test_set = pd.read_csv('d_test.csv', encoding='latin-1')
|
||||||
X_test = torch.FloatTensor(X_test)
|
test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
|
||||||
y_train = torch.LongTensor(y_train)
|
|
||||||
y_test = torch.LongTensor(y_test)
|
|
||||||
|
|
||||||
|
|
||||||
# Hiperparametry
|
# Mapowanie kolumny 'Reviewer_Location' na cyfry
|
||||||
model = Model()
|
le = LabelEncoder()
|
||||||
criterion = nn.CrossEntropyLoss()
|
le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
|
||||||
|
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
|
||||||
|
|
||||||
|
|
||||||
# Trening
|
# Mapowanie kolumny 'Branch' na inny sposób
|
||||||
losses = []
|
mappings = {
|
||||||
for i in range(epochs):
|
'Disneyland_California': 0,
|
||||||
y_pred = model.forward(X_train)
|
'Disneyland_Paris': 1,
|
||||||
loss = criterion(y_pred, y_train)
|
'Disneyland_HongKong': 2
|
||||||
losses.append(loss)
|
}
|
||||||
print(f'epoch: {i:2} loss: {loss.item():10.8f}')
|
train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
|
||||||
|
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
|
||||||
optimizer.zero_grad()
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
|
|
||||||
|
|
||||||
# Testy
|
# Zamiana danych na tensory
|
||||||
preds = []
|
X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
|
||||||
with torch.no_grad():
|
X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
|
||||||
for val in X_test:
|
y_train = train_set['Branch'].to_numpy()
|
||||||
y_hat = model.forward(val)
|
y_test = test_set['Branch'].to_numpy()
|
||||||
preds.append(y_hat.argmax().item())
|
|
||||||
|
|
||||||
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
|
X_train = torch.FloatTensor(X_train)
|
||||||
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
|
X_test = torch.FloatTensor(X_test)
|
||||||
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
|
y_train = torch.LongTensor(y_train)
|
||||||
|
y_test = torch.LongTensor(y_test)
|
||||||
|
|
||||||
|
|
||||||
# Zapis do pliku
|
# Hiperparametry
|
||||||
df.to_csv('neural_network_prediction_results.csv', index=False)
|
model = Model()
|
||||||
torch.save(model, "model.pkl")
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
# Trening
|
||||||
|
losses = []
|
||||||
|
for i in range(epochs):
|
||||||
|
y_pred = model.forward(X_train)
|
||||||
|
loss = criterion(y_pred, y_train)
|
||||||
|
losses.append(loss)
|
||||||
|
print(f'epoch: {i:2} loss: {loss.item():10.8f}')
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
_run.log_scalar("training.final_loss", losses[-1].item()) # Ostateczny loss
|
||||||
|
|
||||||
|
|
||||||
|
# Testy
|
||||||
|
preds = []
|
||||||
|
with torch.no_grad():
|
||||||
|
for val in X_test:
|
||||||
|
y_hat = model.forward(val)
|
||||||
|
preds.append(y_hat.argmax().item())
|
||||||
|
|
||||||
|
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
|
||||||
|
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
|
||||||
|
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
|
||||||
|
|
||||||
|
|
||||||
|
# Zapis do pliku
|
||||||
|
df.to_csv('neural_network_prediction_results.csv', index=False)
|
||||||
|
torch.save(model, "model.pkl")
|
||||||
|
|
||||||
|
# Zapis Sacred
|
||||||
|
ex.add_artifact("model.pkl")
|
||||||
|
ex.add_artifact("neural_network_prediction_results.csv")
|
||||||
|
@ -24,13 +24,13 @@ pipeline {
|
|||||||
stage('Train model') {
|
stage('Train model') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["EPOCH=${params.EPOCH}"]) {
|
withEnv(["EPOCH=${params.EPOCH}"]) {
|
||||||
sh 'python biblioteki_ml.py $EPOCH'
|
sh 'python biblioteki_ml.py with "epochs=$EPOCH"'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Archive model') {
|
stage('Archive artifacts') {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv'
|
archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv, my_run'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage ('Model - evaluation') {
|
stage ('Model - evaluation') {
|
||||||
|
Loading…
Reference in New Issue
Block a user