From e8a48f51f38deb296aa6c61b2c63a6d7b1afe4c6 Mon Sep 17 00:00:00 2001 From: Alicja Szulecka <73056579+AliSzu@users.noreply.github.com> Date: Sun, 14 Apr 2024 12:24:44 +0200 Subject: [PATCH] model and prediction scripts --- .../inspectionProfiles/profiles_settings.xml | 6 - .idea/misc.xml | 4 - .idea/vcs.xml | 6 - .idea/workspace.xml | 76 ------------ model.py | 111 ++++++++++++++++++ prediction.py | 69 +++++++++++ 6 files changed, 180 insertions(+), 92 deletions(-) delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/vcs.xml delete mode 100644 .idea/workspace.xml create mode 100644 model.py create mode 100644 prediction.py diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index f5a93a6..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index c8397c9..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index 68be8ea..0000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - - - - - - - - - { - "keyToString": { - "RunOnceActivity.OpenProjectViewOnStart": "true", - "RunOnceActivity.ShowReadmeOnStart": "true", - "WebServerToolWindowFactoryState": "false", - "last_opened_file_path": "/home/students/s464914/PycharmProjects/ium_464914", - "node.js.detected.package.eslint": "true", - "node.js.detected.package.tslint": "true", - "node.js.selected.package.eslint": "(autodetect)", - "node.js.selected.package.tslint": "(autodetect)", - "vue.rearranger.settings.migration": "true" - } -} - - - - - 1710696754593 - - - - - - - - - \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..570e587 --- /dev/null +++ b/model.py @@ -0,0 +1,111 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, Dataset +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +import torch.nn.functional as F + + +device = ( + "cuda" + if torch.cuda.is_available() + else "cpu" +) + +class Model(nn.Module): + def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8): + super().__init__() + self.fc1 = nn.Linear(input_features,output_features) + self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization + self.fc2 = nn.Linear(hidden_layer1, hidden_layer2) + self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization + self.out = nn.Linear(hidden_layer2, output_features) + + def forward(self, x): + x = F.relu(self.fc1(x)) # Apply batch normalization after first linear layer + #x = F.relu(self.bn2(self.fc2(x))) # Apply batch normalization after second linear layer + #x = self.out(x) + return x + +def main(): + forest_train = pd.read_csv('forest_train.csv') + forest_val = pd.read_csv('forest_val.csv') + + print(forest_train.head()) + + + X_train = forest_train.drop(columns=['Cover_Type']).values + y_train = forest_train['Cover_Type'].values + + X_val = forest_val.drop(columns=['Cover_Type']).values + y_val = forest_val['Cover_Type'].values + + + # Initialize model, loss function, and optimizer + model = Model().to(device) + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=0.001) + + # Convert to PyTorch tensors + X_train = torch.tensor(X_train, dtype=torch.float32).to(device) + y_train = torch.tensor(y_train, dtype=torch.long).to(device) + X_val = torch.tensor(X_val, dtype=torch.float32).to(device) + y_val = torch.tensor(y_val, dtype=torch.long).to(device) + + # Create DataLoader + train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True) + val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64) + + # Training loop + epochs = 10 + for epoch in range(epochs): + model.train() # Set model to training mode + running_loss = 0.0 + for inputs, labels in train_loader: + inputs, labels = inputs.to(device), labels.to(device) + + optimizer.zero_grad() + + outputs = model(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * inputs.size(0) + + # Calculate training loss + epoch_loss = running_loss / len(train_loader.dataset) + + # Validation + model.eval() # Set model to evaluation mode + val_running_loss = 0.0 + correct = 0 + total = 0 + with torch.no_grad(): + for inputs, labels in val_loader: + inputs, labels = inputs.to(device), labels.to(device) + + outputs = model(inputs) + val_loss = criterion(outputs, labels) + val_running_loss += val_loss.item() * inputs.size(0) + + _, predicted = torch.max(outputs, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + # Calculate validation loss and accuracy + val_epoch_loss = val_running_loss / len(val_loader.dataset) + val_accuracy = correct / total + + print(f"Epoch {epoch+1}/{epochs}, " + f"Train Loss: {epoch_loss:.4f}, " + f"Val Loss: {val_epoch_loss:.4f}, " + f"Val Accuracy: {val_accuracy:.4f}") + + + torch.save(model.state_dict(), 'model.pth') + +if __name__ == "__main__": + main() diff --git a/prediction.py b/prediction.py new file mode 100644 index 0000000..f071bc4 --- /dev/null +++ b/prediction.py @@ -0,0 +1,69 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, Dataset +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +import torch.nn.functional as F + +device = ( + "cuda" + if torch.cuda.is_available() + else "cpu" +) + +class Model(nn.Module): + def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8): + super().__init__() + self.fc1 = nn.Linear(input_features,output_features) + self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization + self.fc2 = nn.Linear(hidden_layer1, hidden_layer2) + self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization + self.out = nn.Linear(hidden_layer2, output_features) + + def forward(self, x): + x = F.relu(self.fc1(x)) + return x + +def load_model(model, model_path): + model.load_state_dict(torch.load(model_path)) + model.eval() + +def predict(model, input_data): + # Convert input data to PyTorch tensor + + # Perform forward pass + with torch.no_grad(): + output = model(input_data) + + _, predicted_class = torch.max(output, 0) + + return predicted_class.item() # Return the predicted class label + + +def main(): + forest_test = pd.read_csv('forest_test.csv') + + X_test = forest_test.drop(columns=['Cover_Type']).values + y_test = forest_test['Cover_Type'].values + + X_test = torch.tensor(X_test, dtype=torch.float32).to(device) + + model = Model().to(device) + model_path = 'model.pth' # Path to your saved model file + load_model(model, model_path) + + predictions = [] + for input_data in X_test: + predicted_class = predict(model, input_data) + predictions.append(predicted_class) + + with open(r'predictions.txt', 'w') as fp: + for item in predictions: + # write each item on a new line + fp.write("%s\n" % item) + + +if __name__ == "__main__": + main() \ No newline at end of file