From e8a48f51f38deb296aa6c61b2c63a6d7b1afe4c6 Mon Sep 17 00:00:00 2001
From: Alicja Szulecka <73056579+AliSzu@users.noreply.github.com>
Date: Sun, 14 Apr 2024 12:24:44 +0200
Subject: [PATCH] model and prediction scripts

---
 .../inspectionProfiles/profiles_settings.xml  |   6 -
 .idea/misc.xml                                |   4 -
 .idea/vcs.xml                                 |   6 -
 .idea/workspace.xml                           |  76 ------------
 model.py                                      | 111 ++++++++++++++++++
 prediction.py                                 |  69 +++++++++++
 6 files changed, 180 insertions(+), 92 deletions(-)
 delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/vcs.xml
 delete mode 100644 .idea/workspace.xml
 create mode 100644 model.py
 create mode 100644 prediction.py
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index f5a93a6..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index c8397c9..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
deleted file mode 100644
index 68be8ea..0000000
--- a/.idea/workspace.xml
+++ /dev/null
@@ -1,76 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="AutoImportSettings">
-    <option name="autoReloadType" value="SELECTIVE" />
-  </component>
-  <component name="ChangeListManager">
-    <list default="true" id="40d6174e-f930-434f-92f0-26bfa57af58c" name="Changes" comment="">
-      <change beforePath="$PROJECT_DIR$/IUM_2.ipynb" beforeDir="false" afterPath="$PROJECT_DIR$/IUM_2.ipynb" afterDir="false" />
-    </list>
-    <option name="SHOW_DIALOG" value="false" />
-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
-    <option name="LAST_RESOLUTION" value="IGNORE" />
-  </component>
-  <component name="Git.Settings">
-    <option name="RECENT_BRANCH_BY_REPOSITORY">
-      <map>
-        <entry key="$PROJECT_DIR$" value="ium_2" />
-      </map>
-    </option>
-    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
-  </component>
-  <component name="MarkdownSettingsMigration">
-    <option name="stateVersion" value="1" />
-  </component>
-  <component name="ProjectId" id="2dpEjKsY3xaMmDCHDmrd7pCeSw4" />
-  <component name="ProjectViewState">
-    <option name="hideEmptyMiddlePackages" value="true" />
-    <option name="showLibraryContents" value="true" />
-  </component>
-  <component name="PropertiesComponent">{
-  &quot;keyToString&quot;: {
-    &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
-    &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
-    &quot;WebServerToolWindowFactoryState&quot;: &quot;false&quot;,
-    &quot;last_opened_file_path&quot;: &quot;/home/students/s464914/PycharmProjects/ium_464914&quot;,
-    &quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
-    &quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
-    &quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
-    &quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
-    &quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
-  }
-}</component>
-  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
-  <component name="TaskManager">
-    <task active="true" id="Default" summary="Default task">
-      <changelist id="40d6174e-f930-434f-92f0-26bfa57af58c" name="Changes" comment="" />
-      <created>1710696754593</created>
-      <option name="number" value="Default" />
-      <option name="presentableId" value="Default" />
-      <updated>1710696754593</updated>
-      <workItem from="1710696756015" duration="548000" />
-      <workItem from="1710940251374" duration="3584000" />
-      <workItem from="1711050477406" duration="616000" />
-      <workItem from="1711457152275" duration="7994000" />
-      <workItem from="1711472959743" duration="2963000" />
-      <workItem from="1713023286972" duration="213000" />
-      <workItem from="1713024301113" duration="305000" />
-    </task>
-    <servers />
-  </component>
-  <component name="TypeScriptGeneratedFilesManager">
-    <option name="version" value="3" />
-  </component>
-  <component name="Vcs.Log.Tabs.Properties">
-    <option name="TAB_STATES">
-      <map>
-        <entry key="MAIN">
-          <value>
-            <State />
-          </value>
-        </entry>
-      </map>
-    </option>
-  </component>
-</project>
\ No newline at end of file
diff --git a/model.py b/model.py
new file mode 100644
index 0000000..570e587
--- /dev/null
+++ b/model.py
@@ -0,0 +1,111 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import torch.nn.functional as F
+ 
+
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "cpu"
+)
+
+class Model(nn.Module):
+    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
+        super().__init__()
+        self.fc1 = nn.Linear(input_features,output_features)
+        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
+        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
+        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
+        self.out = nn.Linear(hidden_layer2, output_features)
+        
+    def forward(self, x):
+        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
+        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
+        #x = self.out(x)
+        return x
+
+def main():
+    forest_train = pd.read_csv('forest_train.csv')
+    forest_val = pd.read_csv('forest_val.csv')
+
+    print(forest_train.head())
+
+
+    X_train = forest_train.drop(columns=['Cover_Type']).values
+    y_train = forest_train['Cover_Type'].values
+
+    X_val = forest_val.drop(columns=['Cover_Type']).values
+    y_val = forest_val['Cover_Type'].values
+
+
+    # Initialize model, loss function, and optimizer
+    model = Model().to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+    # Convert to PyTorch tensors
+    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
+    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
+    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
+    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
+
+    # Create DataLoader
+    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
+    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
+
+    # Training loop
+    epochs = 10
+    for epoch in range(epochs):
+        model.train()  # Set model to training mode
+        running_loss = 0.0
+        for inputs, labels in train_loader:
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            optimizer.zero_grad()
+
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item() * inputs.size(0)
+
+        # Calculate training loss
+        epoch_loss = running_loss / len(train_loader.dataset)
+
+        # Validation
+        model.eval()  # Set model to evaluation mode
+        val_running_loss = 0.0
+        correct = 0
+        total = 0
+        with torch.no_grad():
+            for inputs, labels in val_loader:
+                inputs, labels = inputs.to(device), labels.to(device)
+
+                outputs = model(inputs)
+                val_loss = criterion(outputs, labels)
+                val_running_loss += val_loss.item() * inputs.size(0)
+
+                _, predicted = torch.max(outputs, 1)
+                total += labels.size(0)
+                correct += (predicted == labels).sum().item()
+
+        # Calculate validation loss and accuracy
+        val_epoch_loss = val_running_loss / len(val_loader.dataset)
+        val_accuracy = correct / total
+
+        print(f"Epoch {epoch+1}/{epochs}, "
+              f"Train Loss: {epoch_loss:.4f}, "
+              f"Val Loss: {val_epoch_loss:.4f}, "
+              f"Val Accuracy: {val_accuracy:.4f}")
+        
+
+    torch.save(model.state_dict(), 'model.pth')
+
+if __name__ == "__main__":
+    main()
diff --git a/prediction.py b/prediction.py
new file mode 100644
index 0000000..f071bc4
--- /dev/null
+++ b/prediction.py
@@ -0,0 +1,69 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import torch.nn.functional as F
+
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "cpu"
+)
+
+class Model(nn.Module):
+    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
+        super().__init__()
+        self.fc1 = nn.Linear(input_features,output_features)
+        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
+        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
+        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
+        self.out = nn.Linear(hidden_layer2, output_features)
+        
+    def forward(self, x):
+        x = F.relu(self.fc1(x)) 
+        return x
+
+def load_model(model, model_path):
+    model.load_state_dict(torch.load(model_path))
+    model.eval()
+
+def predict(model, input_data):
+    # Convert input data to PyTorch tensor
+    
+    # Perform forward pass
+    with torch.no_grad():
+        output = model(input_data)
+
+    _, predicted_class = torch.max(output, 0)
+    
+    return predicted_class.item()  # Return the predicted class label
+
+
+def main():
+    forest_test = pd.read_csv('forest_test.csv')
+
+    X_test = forest_test.drop(columns=['Cover_Type']).values
+    y_test = forest_test['Cover_Type'].values
+
+    X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
+
+    model = Model().to(device)
+    model_path = 'model.pth'  # Path to your saved model file
+    load_model(model, model_path)
+
+    predictions = []
+    for input_data in X_test:
+        predicted_class = predict(model, input_data)
+        predictions.append(predicted_class)
+    
+    with open(r'predictions.txt', 'w') as fp:
+        for item in predictions:
+            # write each item on a new line
+            fp.write("%s\n" % item)
+   
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file