neuralNetwork
This commit is contained in:
parent
e0e8daf6f6
commit
90cb600e08
1434
dev-0/out.tsv
1434
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
110
run_nn.py
Normal file
110
run_nn.py
Normal file
@ -0,0 +1,110 @@
|
||||
import gensim.downloader
|
||||
import torch.optim as optim
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
import numpy as np
|
||||
from torch import relu, sigmoid
|
||||
|
||||
#from timeit import default_timer as timer
|
||||
|
||||
class NNet(nn.Module):
|
||||
def __init__(self):
|
||||
super(NNet, self).__init__()
|
||||
self.ll1 = nn.Linear(100, 1000)
|
||||
self.ll2 = nn.Linear(1000, 400)
|
||||
self.ll3 = nn.Linear(400, 1)
|
||||
|
||||
def forward(self, x):
|
||||
x = relu(self.ll1(x))
|
||||
x = relu(self.ll2(x))
|
||||
x = sigmoid(self.ll3(x))
|
||||
return x
|
||||
|
||||
def read_data(folder_name):
|
||||
with open(f'{folder_name}/in.tsv', encoding='utf-8') as file:
|
||||
x = [line.lower().split()[:-2] for line in file.readlines()]
|
||||
|
||||
with open(f'{folder_name}/expected.tsv', encoding='utf-8') as file:
|
||||
y = [int(line.split()[0]) for line in file.readlines()]
|
||||
return x, y
|
||||
|
||||
|
||||
def process_data(data, word2vec):
|
||||
processed_data = []
|
||||
for reddit in data:
|
||||
words_sim = [word2vec[word] for word in reddit if word in word2vec]
|
||||
processed_data.append(np.mean(words_sim or [np.zeros(100)], axis=0))
|
||||
return processed_data
|
||||
|
||||
|
||||
def predict(folder_name, model, word_vec):
|
||||
with open(f'{folder_name}/in.tsv', encoding='utf-8') as file:
|
||||
x_data = [line.lower().split()[:-2] for line in file.readlines()]
|
||||
|
||||
x_train = process_data(x_data, word_vec)
|
||||
|
||||
y_predictions = []
|
||||
with torch.no_grad():
|
||||
for i, inputs in enumerate(x_train):
|
||||
inputs = torch.tensor(inputs.astype(np.float32)).to(device)
|
||||
|
||||
y_predicted = model(inputs)
|
||||
y_predictions.append(y_predicted > 0.5)
|
||||
return y_predictions
|
||||
|
||||
|
||||
def save_predictions(folder_name, predicted_labels):
|
||||
predictions = []
|
||||
for pred in predicted_labels:
|
||||
predictions.append(pred.int()[0].item())
|
||||
|
||||
with open(f"{folder_name}/out.tsv", "w", encoding="UTF-8") as file_out:
|
||||
for pred in predictions:
|
||||
file_out.writelines(f"{str(pred)}\n")
|
||||
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(device) #gpu is a bit faster here
|
||||
|
||||
word_vectors = gensim.downloader.load("glove-wiki-gigaword-100")
|
||||
|
||||
x_data, y_train = read_data('train')
|
||||
x_train = process_data(x_data, word_vectors)
|
||||
|
||||
model = NNet().to(device)
|
||||
|
||||
criterion = nn.BCELoss()
|
||||
optimizer = optim.SGD(model.parameters(), lr=0.005) #, momentum=0.9)
|
||||
|
||||
for epoch in range(2):
|
||||
running_loss = 0.0
|
||||
correct = 0.
|
||||
total = 0.
|
||||
for i, (inputs, label) in enumerate(zip(x_train, y_train)):
|
||||
inputs = torch.tensor(inputs.astype(np.float32)).to(device)
|
||||
label = torch.tensor(np.array(label).astype(np.float32)).reshape(1).to(device)
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward + backward + optimize
|
||||
y_predicted = model(inputs)
|
||||
loss = criterion(y_predicted, label)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
correct += ((y_predicted > 0.5) == label).type(torch.float).sum().item()
|
||||
total += label.size(0)
|
||||
|
||||
if i % 10000 == 9999: # print every 10000 mini-batches
|
||||
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 10000:.3f}')
|
||||
print(f'Accuracy score: {100 * correct / total} %')
|
||||
running_loss = 0.0
|
||||
|
||||
predicted = predict('dev-0', model, word_vectors)
|
||||
save_predictions('dev-0', predicted)
|
||||
|
||||
predicted = predict('test-A', model, word_vectors)
|
||||
save_predictions('test-A', predicted)
|
1726
test-A/out.tsv
1726
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
277440
test.ipynb
Normal file
277440
test.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user