s443930
This commit is contained in:
parent
ecfafbf86c
commit
587808bce9
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
22
inout.py
Normal file
22
inout.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import csv, lzma
|
||||||
|
|
||||||
|
# Reads input from directory and returns a list
|
||||||
|
def read(dir):
|
||||||
|
X = []
|
||||||
|
if 'xz' in dir:
|
||||||
|
with lzma.open(dir) as f:
|
||||||
|
for line in f:
|
||||||
|
text = line.decode('utf-8')
|
||||||
|
text = text.split('\t')
|
||||||
|
X.append(text)
|
||||||
|
else:
|
||||||
|
with open(dir) as f:
|
||||||
|
for line in f:
|
||||||
|
X.append(line.replace('\n', ''))
|
||||||
|
return X
|
||||||
|
|
||||||
|
# Takes the output (list) and writes it into directory
|
||||||
|
def write(output, dir):
|
||||||
|
with open(dir, 'w', newline='') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerows(output)
|
53
neurotic.py
Normal file
53
neurotic.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import torch, numpy as np
|
||||||
|
from gensim.models import Word2Vec
|
||||||
|
import inout as io
|
||||||
|
from nnModel import NeuralNetworkModel, trainModel, predict
|
||||||
|
|
||||||
|
|
||||||
|
def getX(train, dev, test):
|
||||||
|
Xs = []
|
||||||
|
for file in [train, dev, test]:
|
||||||
|
X = io.read(file)
|
||||||
|
Xs.append([x[0].split() for x in X])
|
||||||
|
return Xs
|
||||||
|
|
||||||
|
def getY(dir):
|
||||||
|
return [np.array(io.read(file)) for file in dir]
|
||||||
|
|
||||||
|
def vectorize(word2vec, documents):
|
||||||
|
vectorized = []
|
||||||
|
for d in documents:
|
||||||
|
vectorized.append(np.mean([word2vec.wv[word] if word in word2vec.wv else np.zeros(100, dtype=float) for word in d], axis=0))
|
||||||
|
return np.array(vectorized)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
trainX, devX, testX = getX('train/in.tsv.xz', 'dev-0/in.tsv.xz', 'test-A/in.tsv.xz')
|
||||||
|
trainY, devY = getY(['train/expected.tsv', 'dev-0/expected.tsv'])
|
||||||
|
|
||||||
|
word2vec = Word2Vec(trainX, vector_size=100, min_count=2)
|
||||||
|
|
||||||
|
trainX = vectorize(word2vec, trainX)
|
||||||
|
devX = vectorize(word2vec, devX)
|
||||||
|
testX = vectorize(word2vec, testX)
|
||||||
|
|
||||||
|
nnModel = NeuralNetworkModel()
|
||||||
|
optimizer = torch.optim.SGD(nnModel.parameters(), lr = 0.1)
|
||||||
|
|
||||||
|
trainModel(nnModel, trainX, trainY, devX, devY, optimizer)
|
||||||
|
|
||||||
|
io.write(predict(nnModel, trainX), 'train/out.tsv')
|
||||||
|
io.write(predict(nnModel, devX), 'dev-0/out.tsv')
|
||||||
|
io.write(predict(nnModel, testX), 'test-A/out.tsv')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
67
nnModel.py
Normal file
67
nnModel.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import torch, numpy as np
|
||||||
|
|
||||||
|
class NeuralNetworkModel(torch.nn.Module):
|
||||||
|
def __init__(self, features=100):
|
||||||
|
super(NeuralNetworkModel, self).__init__()
|
||||||
|
self.fc1 = torch.nn.Linear(features, 500)
|
||||||
|
self.fc2 = torch.nn.Linear(500, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = torch.relu(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = torch.sigmoid(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def getMetrics(model, X_dataset, Y_dataset, criterion, batchSize):
|
||||||
|
loss_score = 0
|
||||||
|
acc_score = 0
|
||||||
|
items_total = 0
|
||||||
|
model.eval()
|
||||||
|
for i in range(0, Y_dataset.shape[0], batchSize):
|
||||||
|
X = X_dataset[i:i+batchSize]
|
||||||
|
X = torch.tensor(X.astype(np.float32))
|
||||||
|
Y = Y_dataset[i:i+batchSize]
|
||||||
|
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
|
||||||
|
Y_predictions = model(X)
|
||||||
|
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
|
||||||
|
items_total += Y.shape[0]
|
||||||
|
|
||||||
|
loss = criterion(Y_predictions, Y)
|
||||||
|
|
||||||
|
loss_score += loss.item() * Y.shape[0]
|
||||||
|
return (loss_score / items_total), (acc_score / items_total)
|
||||||
|
|
||||||
|
def trainModel(model, trainX, trainY, devX, devY, optimizer, criterion=torch.nn.BCELoss(), epochs=5, batchSize=256):
|
||||||
|
for epoch in range(epochs):
|
||||||
|
loss_score = 0
|
||||||
|
acc_score = 0
|
||||||
|
items_total = 0
|
||||||
|
model.train()
|
||||||
|
for i in range(0, trainY.shape[0], batchSize):
|
||||||
|
X = trainX[i:i+batchSize]
|
||||||
|
X = torch.tensor(X.astype(np.float32))
|
||||||
|
Y = trainY[i:i+batchSize]
|
||||||
|
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
|
||||||
|
Y_predictions = model(X)
|
||||||
|
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
|
||||||
|
items_total += Y.shape[0]
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss = criterion(Y_predictions, Y)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
loss_score += loss.item() * Y.shape[0]
|
||||||
|
|
||||||
|
print(f'Epoch {epoch+1}/{epochs}')
|
||||||
|
loss, accuracy = getMetrics(model, trainX, trainY, criterion, batchSize)
|
||||||
|
print(f'Train set\nloss = {loss}, accuracy = {accuracy}')
|
||||||
|
|
||||||
|
def flatten(t):
|
||||||
|
return [str(int(item)) for sublist in t for item in sublist]
|
||||||
|
|
||||||
|
def predict(model, testX):
|
||||||
|
testX = torch.tensor(testX.astype(np.float32))
|
||||||
|
with torch.no_grad():
|
||||||
|
return flatten(model(testX).round().tolist())
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
289579
train/out.tsv
Normal file
289579
train/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user