This commit is contained in:
parent
65e53bd20e
commit
a8eb0ab145
@ -13,6 +13,7 @@ WORKDIR /app
|
|||||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||||
COPY ./skrypt_download.py ./
|
COPY ./skrypt_download.py ./
|
||||||
COPY ./skrypt_stat.py ./
|
COPY ./skrypt_stat.py ./
|
||||||
|
COPY ./IUM_05.py ./
|
||||||
|
|
||||||
RUN mkdir /.kaggle
|
RUN mkdir /.kaggle
|
||||||
RUN chmod -R 777 /.kaggle
|
RUN chmod -R 777 /.kaggle
|
||||||
|
BIN
FootballModel.pth
Normal file
BIN
FootballModel.pth
Normal file
Binary file not shown.
152
IUM_05.py
Normal file
152
IUM_05.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import pandas as pd
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch.utils.data import DataLoader, TensorDataset, random_split
|
||||||
|
from sklearn import preprocessing
|
||||||
|
|
||||||
|
results = pd.read_csv('results.csv')
|
||||||
|
#brak wierszy z NaN
|
||||||
|
results.dropna()
|
||||||
|
|
||||||
|
#normalizacja itp
|
||||||
|
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
|
||||||
|
results[collumn] = results[collumn].str.lower()
|
||||||
|
|
||||||
|
categorical_cols = results.select_dtypes(include=object).columns.values
|
||||||
|
|
||||||
|
|
||||||
|
train, test = train_test_split(results, test_size= 1 - 0.4)
|
||||||
|
|
||||||
|
#valid, test = train_test_split(test, test_size=0.5)
|
||||||
|
input_cols = train.columns.values[1:-1]
|
||||||
|
output_cols = train.columns.values[-1:]
|
||||||
|
|
||||||
|
|
||||||
|
def dataframe_to_arrays(dataframe):
|
||||||
|
# Make a copy of the original dataframe
|
||||||
|
dataframe1 = dataframe.copy(deep=True)
|
||||||
|
# Convert non-numeric categorical columns to numbers
|
||||||
|
for col in categorical_cols:
|
||||||
|
dataframe1[col] = dataframe1[col].astype('category').cat.codes
|
||||||
|
# Extract input & outupts as numpy arrays
|
||||||
|
|
||||||
|
min_max_scaler = preprocessing.MinMaxScaler()
|
||||||
|
x_scaled = min_max_scaler.fit_transform(dataframe1)
|
||||||
|
dataframe1 = pd.DataFrame(x_scaled, columns = dataframe1.columns)
|
||||||
|
|
||||||
|
inputs_array = dataframe1[input_cols].to_numpy()
|
||||||
|
targets_array = dataframe1[output_cols].to_numpy()
|
||||||
|
return inputs_array, targets_array
|
||||||
|
|
||||||
|
inputs_array_training, targets_array_training = dataframe_to_arrays(train)
|
||||||
|
|
||||||
|
|
||||||
|
inputs_array_testing, targets_array_testing = dataframe_to_arrays(test)
|
||||||
|
|
||||||
|
|
||||||
|
inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32)
|
||||||
|
targets_training = torch.from_numpy(targets_array_training).type(torch.float32)
|
||||||
|
|
||||||
|
inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32)
|
||||||
|
targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32)
|
||||||
|
|
||||||
|
train_dataset = TensorDataset(inputs_training, targets_training)
|
||||||
|
val_dataset = TensorDataset(inputs_testing, targets_testing)
|
||||||
|
|
||||||
|
batch_size = 64
|
||||||
|
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
|
||||||
|
val_loader = DataLoader(val_dataset, batch_size*2)
|
||||||
|
|
||||||
|
input_size = len(input_cols)
|
||||||
|
output_size = len(output_cols)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FootbalModel(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.linear = nn.Linear(input_size, output_size)
|
||||||
|
|
||||||
|
def forward(self, xb):
|
||||||
|
out = self.linear(xb)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def training_step(self, batch):
|
||||||
|
inputs, targets = batch
|
||||||
|
# Generate predictions
|
||||||
|
out = self(inputs)
|
||||||
|
# Calcuate loss
|
||||||
|
# loss = F.l1_loss(out, targets)
|
||||||
|
loss = F.mse_loss(out, targets)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def validation_step(self, batch):
|
||||||
|
inputs, targets = batch
|
||||||
|
# Generate predictions
|
||||||
|
out = self(inputs)
|
||||||
|
# Calculate loss
|
||||||
|
# loss = F.l1_loss(out, targets)
|
||||||
|
loss = F.mse_loss(out, targets)
|
||||||
|
return {'val_loss': loss.detach()}
|
||||||
|
|
||||||
|
def validation_epoch_end(self, outputs):
|
||||||
|
batch_losses = [x['val_loss'] for x in outputs]
|
||||||
|
epoch_loss = torch.stack(batch_losses).mean()
|
||||||
|
return {'val_loss': epoch_loss.item()}
|
||||||
|
|
||||||
|
def epoch_end(self, epoch, result, num_epochs):
|
||||||
|
# Print result every 20th epoch
|
||||||
|
if (epoch + 1) % 20 == 0 or epoch == num_epochs - 1:
|
||||||
|
print("Epoch [{}], val_loss: {:.4f}".format(epoch + 1, result['val_loss']))
|
||||||
|
|
||||||
|
model = FootbalModel()
|
||||||
|
list(model.parameters())
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(model, val_loader):
|
||||||
|
outputs = [model.validation_step(batch) for batch in val_loader]
|
||||||
|
return model.validation_epoch_end(outputs)
|
||||||
|
|
||||||
|
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
|
||||||
|
history = []
|
||||||
|
optimizer = opt_func(model.parameters(), lr)
|
||||||
|
for epoch in range(epochs):
|
||||||
|
# Training Phase
|
||||||
|
for batch in train_loader:
|
||||||
|
loss = model.training_step(batch)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
# Validation phase
|
||||||
|
result = evaluate(model, val_loader)
|
||||||
|
model.epoch_end(epoch, result, epochs)
|
||||||
|
history.append(result)
|
||||||
|
return history
|
||||||
|
|
||||||
|
|
||||||
|
result = evaluate(model, val_loader) # Use the the evaluate function
|
||||||
|
|
||||||
|
epochs = 100
|
||||||
|
lr = 1e-6
|
||||||
|
history3 = fit(epochs, lr, model, train_loader, val_loader)
|
||||||
|
|
||||||
|
def predict_single(input, target, model):
|
||||||
|
inputs = input.unsqueeze(0)
|
||||||
|
predictions = model(input) # fill this
|
||||||
|
prediction = predictions[0].detach()
|
||||||
|
print("Prediction:", prediction)
|
||||||
|
if prediction >= 0.5:
|
||||||
|
print('Neutral')
|
||||||
|
else:
|
||||||
|
print('not neutral')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(len(val_dataset)):
|
||||||
|
input, target = val_dataset[i]
|
||||||
|
predict_single(input, target, model)
|
||||||
|
|
||||||
|
|
||||||
|
torch.save(model.state_dict(), 'FootballModel.pth')
|
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@ -27,10 +27,12 @@ node {
|
|||||||
def image = docker.build("s434732/ium")
|
def image = docker.build("s434732/ium")
|
||||||
image.inside {
|
image.inside {
|
||||||
sh 'python3 ./skrypt_download.py'
|
sh 'python3 ./skrypt_download.py'
|
||||||
|
sh 'python3 ./IUM_05.py > model.txt'
|
||||||
|
|
||||||
archiveArtifacts "train.csv"
|
archiveArtifacts "train.csv"
|
||||||
archiveArtifacts "test.csv"
|
archiveArtifacts "test.csv"
|
||||||
archiveArtifacts "valid.csv"
|
archiveArtifacts "valid.csv"
|
||||||
|
archiveArtifacts 'model.txt'
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ results.dropna()
|
|||||||
#normalizacja itp
|
#normalizacja itp
|
||||||
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
|
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
|
||||||
results[collumn] = results[collumn].str.lower()
|
results[collumn] = results[collumn].str.lower()
|
||||||
|
|
||||||
# Podział zbioru 6:1:1
|
# Podział zbioru 6:1:1
|
||||||
train, test = train_test_split(results, test_size= 1 - 0.6)
|
train, test = train_test_split(results, test_size= 1 - 0.6)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user