Compare commits
3 Commits
Author | SHA1 | Date | |
---|---|---|---|
42ede5e2c7 | |||
2fc8abbc87 | |||
328cb684d4 |
16
Net.py
Normal file
16
Net.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
from torch import relu, sigmoid
|
||||||
|
|
||||||
|
|
||||||
|
class NNet(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(NNet, self).__init__()
|
||||||
|
self.ll1 = nn.Linear(100, 1000)
|
||||||
|
self.ll2 = nn.Linear(1000, 400)
|
||||||
|
self.ll3 = nn.Linear(400, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = relu(self.ll1(x))
|
||||||
|
x = relu(self.ll2(x))
|
||||||
|
x = sigmoid(self.ll3(x))
|
||||||
|
return x
|
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
97
run.py
Normal file
97
run.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
import gensim.downloader
|
||||||
|
import torch.optim as optim
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from Net import NNet
|
||||||
|
#from timeit import default_timer as timer
|
||||||
|
|
||||||
|
|
||||||
|
def read_data(folder_name):
|
||||||
|
with open(f'{folder_name}/in.tsv', encoding='utf-8') as file:
|
||||||
|
x = [line.lower().split()[:-2] for line in file.readlines()]
|
||||||
|
|
||||||
|
with open(f'{folder_name}/expected.tsv', encoding='utf-8') as file:
|
||||||
|
y = [int(line.split()[0]) for line in file.readlines()]
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
|
||||||
|
def process_data(data, word2vec):
|
||||||
|
processed_data = []
|
||||||
|
for reddit in data:
|
||||||
|
words_sim = [word2vec[word] for word in reddit if word in word2vec]
|
||||||
|
processed_data.append(np.mean(words_sim or [np.zeros(100)], axis=0))
|
||||||
|
return processed_data
|
||||||
|
|
||||||
|
|
||||||
|
def predict(folder_name, model, word_vec):
|
||||||
|
with open(f'{folder_name}/in.tsv', encoding='utf-8') as file:
|
||||||
|
x_data = [line.lower().split()[:-2] for line in file.readlines()]
|
||||||
|
|
||||||
|
x_train = process_data(x_data, word_vec)
|
||||||
|
|
||||||
|
y_predictions = []
|
||||||
|
with torch.no_grad():
|
||||||
|
for i, inputs in enumerate(x_train):
|
||||||
|
inputs = torch.tensor(inputs.astype(np.float32)).to(device)
|
||||||
|
|
||||||
|
y_predicted = model(inputs)
|
||||||
|
y_predictions.append(y_predicted > 0.5)
|
||||||
|
return y_predictions
|
||||||
|
|
||||||
|
|
||||||
|
def save_predictions(folder_name, predicted_labels):
|
||||||
|
predictions = []
|
||||||
|
for pred in predicted_labels:
|
||||||
|
predictions.append(pred.int()[0].item())
|
||||||
|
|
||||||
|
with open(f"{folder_name}/out.tsv", "w", encoding="UTF-8") as file_out:
|
||||||
|
for pred in predictions:
|
||||||
|
file_out.writelines(f"{str(pred)}\n")
|
||||||
|
|
||||||
|
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
print(device) #gpu is a bit faster here
|
||||||
|
|
||||||
|
word_vectors = gensim.downloader.load("glove-wiki-gigaword-100")
|
||||||
|
|
||||||
|
x_data, y_train = read_data('train')
|
||||||
|
x_train = process_data(x_data, word_vectors)
|
||||||
|
|
||||||
|
model = NNet().to(device)
|
||||||
|
|
||||||
|
criterion = nn.BCELoss()
|
||||||
|
optimizer = optim.SGD(model.parameters(), lr=0.005) #, momentum=0.9)
|
||||||
|
|
||||||
|
for epoch in range(2):
|
||||||
|
running_loss = 0.0
|
||||||
|
correct = 0.
|
||||||
|
total = 0.
|
||||||
|
for i, (inputs, label) in enumerate(zip(x_train, y_train)):
|
||||||
|
inputs = torch.tensor(inputs.astype(np.float32)).to(device)
|
||||||
|
label = torch.tensor(np.array(label).astype(np.float32)).reshape(1).to(device)
|
||||||
|
|
||||||
|
# zero the parameter gradients
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
# forward + backward + optimize
|
||||||
|
y_predicted = model(inputs)
|
||||||
|
loss = criterion(y_predicted, label)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# print statistics
|
||||||
|
running_loss += loss.item()
|
||||||
|
correct += ((y_predicted > 0.5) == label).type(torch.float).sum().item()
|
||||||
|
total += label.size(0)
|
||||||
|
|
||||||
|
if i % 10000 == 9999: # print every 10000 mini-batches
|
||||||
|
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 10000:.3f}')
|
||||||
|
print(f'Accuracy score: {100 * correct / total} %')
|
||||||
|
running_loss = 0.0
|
||||||
|
|
||||||
|
predicted = predict('dev-0', model, word_vectors)
|
||||||
|
save_predictions('dev-0', predicted)
|
||||||
|
|
||||||
|
predicted = predict('test-A', model, word_vectors)
|
||||||
|
save_predictions('test-A', predicted)
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user