model and prediction scripts

This commit is contained in:
Alicja Szulecka 2024-04-14 12:24:44 +02:00
parent 3ca7e62805
commit e8a48f51f3
6 changed files with 180 additions and 92 deletions

View File

@ -1,6 +0,0 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -1,4 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
</project>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -1,76 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="AutoImportSettings">
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="40d6174e-f930-434f-92f0-26bfa57af58c" name="Changes" comment="">
<change beforePath="$PROJECT_DIR$/IUM_2.ipynb" beforeDir="false" afterPath="$PROJECT_DIR$/IUM_2.ipynb" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_BRANCH_BY_REPOSITORY">
<map>
<entry key="$PROJECT_DIR$" value="ium_2" />
</map>
</option>
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectId" id="2dpEjKsY3xaMmDCHDmrd7pCeSw4" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;WebServerToolWindowFactoryState&quot;: &quot;false&quot;,
&quot;last_opened_file_path&quot;: &quot;/home/students/s464914/PycharmProjects/ium_464914&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
}
}</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="40d6174e-f930-434f-92f0-26bfa57af58c" name="Changes" comment="" />
<created>1710696754593</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1710696754593</updated>
<workItem from="1710696756015" duration="548000" />
<workItem from="1710940251374" duration="3584000" />
<workItem from="1711050477406" duration="616000" />
<workItem from="1711457152275" duration="7994000" />
<workItem from="1711472959743" duration="2963000" />
<workItem from="1713023286972" duration="213000" />
<workItem from="1713024301113" duration="305000" />
</task>
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="3" />
</component>
<component name="Vcs.Log.Tabs.Properties">
<option name="TAB_STATES">
<map>
<entry key="MAIN">
<value>
<State />
</value>
</entry>
</map>
</option>
</component>
</project>

111
model.py Normal file
View File

@ -0,0 +1,111 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch.nn.functional as F
device = (
"cuda"
if torch.cuda.is_available()
else "cpu"
)
class Model(nn.Module):
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
super().__init__()
self.fc1 = nn.Linear(input_features,output_features)
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
self.out = nn.Linear(hidden_layer2, output_features)
def forward(self, x):
x = F.relu(self.fc1(x)) # Apply batch normalization after first linear layer
#x = F.relu(self.bn2(self.fc2(x))) # Apply batch normalization after second linear layer
#x = self.out(x)
return x
def main():
forest_train = pd.read_csv('forest_train.csv')
forest_val = pd.read_csv('forest_val.csv')
print(forest_train.head())
X_train = forest_train.drop(columns=['Cover_Type']).values
y_train = forest_train['Cover_Type'].values
X_val = forest_val.drop(columns=['Cover_Type']).values
y_val = forest_val['Cover_Type'].values
# Initialize model, loss function, and optimizer
model = Model().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
# Create DataLoader
train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
# Training loop
epochs = 10
for epoch in range(epochs):
model.train() # Set model to training mode
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
# Calculate training loss
epoch_loss = running_loss / len(train_loader.dataset)
# Validation
model.eval() # Set model to evaluation mode
val_running_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
val_loss = criterion(outputs, labels)
val_running_loss += val_loss.item() * inputs.size(0)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# Calculate validation loss and accuracy
val_epoch_loss = val_running_loss / len(val_loader.dataset)
val_accuracy = correct / total
print(f"Epoch {epoch+1}/{epochs}, "
f"Train Loss: {epoch_loss:.4f}, "
f"Val Loss: {val_epoch_loss:.4f}, "
f"Val Accuracy: {val_accuracy:.4f}")
torch.save(model.state_dict(), 'model.pth')
if __name__ == "__main__":
main()

69
prediction.py Normal file
View File

@ -0,0 +1,69 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch.nn.functional as F
device = (
"cuda"
if torch.cuda.is_available()
else "cpu"
)
class Model(nn.Module):
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
super().__init__()
self.fc1 = nn.Linear(input_features,output_features)
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
self.out = nn.Linear(hidden_layer2, output_features)
def forward(self, x):
x = F.relu(self.fc1(x))
return x
def load_model(model, model_path):
model.load_state_dict(torch.load(model_path))
model.eval()
def predict(model, input_data):
# Convert input data to PyTorch tensor
# Perform forward pass
with torch.no_grad():
output = model(input_data)
_, predicted_class = torch.max(output, 0)
return predicted_class.item() # Return the predicted class label
def main():
forest_test = pd.read_csv('forest_test.csv')
X_test = forest_test.drop(columns=['Cover_Type']).values
y_test = forest_test['Cover_Type'].values
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
model = Model().to(device)
model_path = 'model.pth' # Path to your saved model file
load_model(model, model_path)
predictions = []
for input_data in X_test:
predicted_class = predict(model, input_data)
predictions.append(predicted_class)
with open(r'predictions.txt', 'w') as fp:
for item in predictions:
# write each item on a new line
fp.write("%s\n" % item)
if __name__ == "__main__":
main()