import sys import torch import torch.nn as nn import pandas as pd import torch.nn.functional as F from torch.utils.data import DataLoader, TensorDataset, random_split from sklearn import preprocessing batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 64 epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 100 train = pd.read_csv('train.csv') test = pd.read_csv('test.csv') categorical_cols = train.select_dtypes(include=object).columns.values input_cols = train.columns.values[1:-1] output_cols = train.columns.values[-1:] def dataframe_to_arrays(dataframe): # Make a copy of the original dataframe dataframe1 = dataframe.copy(deep=True) # Convert non-numeric categorical columns to numbers for col in categorical_cols: dataframe1[col] = dataframe1[col].astype('category').cat.codes # Extract input & outupts as numpy arrays min_max_scaler = preprocessing.MinMaxScaler() x_scaled = min_max_scaler.fit_transform(dataframe1) dataframe1 = pd.DataFrame(x_scaled, columns = dataframe1.columns) inputs_array = dataframe1[input_cols].to_numpy() targets_array = dataframe1[output_cols].to_numpy() return inputs_array, targets_array inputs_array_training, targets_array_training = dataframe_to_arrays(train) inputs_array_testing, targets_array_testing = dataframe_to_arrays(test) inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32) targets_training = torch.from_numpy(targets_array_training).type(torch.float32) inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32) targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32) train_dataset = TensorDataset(inputs_training, targets_training) val_dataset = TensorDataset(inputs_testing, targets_testing) train_loader = DataLoader(train_dataset, batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size*2) input_size = len(input_cols) output_size = len(output_cols) class FootbalModel(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(input_size, output_size) def forward(self, xb): out = self.linear(xb) return out def training_step(self, batch): inputs, targets = batch # Generate predictions out = self(inputs) # Calcuate loss # loss = F.l1_loss(out, targets) loss = F.mse_loss(out, targets) return loss def validation_step(self, batch): inputs, targets = batch # Generate predictions out = self(inputs) # Calculate loss # loss = F.l1_loss(out, targets) loss = F.mse_loss(out, targets) return {'val_loss': loss.detach()} def validation_epoch_end(self, outputs): batch_losses = [x['val_loss'] for x in outputs] epoch_loss = torch.stack(batch_losses).mean() return {'val_loss': epoch_loss.item()} def epoch_end(self, epoch, result, num_epochs): # Print result every 20th epoch if (epoch + 1) % 20 == 0 or epoch == num_epochs - 1: print("Epoch [{}], val_loss: {:.4f}".format(epoch + 1, result['val_loss'])) model = FootbalModel() list(model.parameters()) def evaluate(model, val_loader): outputs = [model.validation_step(batch) for batch in val_loader] return model.validation_epoch_end(outputs) def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD): history = [] optimizer = opt_func(model.parameters(), lr) for epoch in range(epochs): # Training Phase for batch in train_loader: loss = model.training_step(batch) loss.backward() optimizer.step() optimizer.zero_grad() # Validation phase result = evaluate(model, val_loader) model.epoch_end(epoch, result, epochs) history.append(result) return history result = evaluate(model, val_loader) # Use the the evaluate function # epochs = 100 lr = 1e-6 history3 = fit(epochs, lr, model, train_loader, val_loader) def predict_single(input, target, model): inputs = input.unsqueeze(0) predictions = model(input) # fill this prediction = predictions[0].detach() print("Prediction:", prediction) if prediction >= 0.5: print('Neutral') else: print('not neutral') for i in range(len(val_dataset)): input, target = val_dataset[i] predict_single(input, target, model) torch.save(model.state_dict(), 'FootballModel.pth')