Compare commits

...

1 Commits

Author SHA1 Message Date
Iwona Christop f8ce7024e4 s443930 2022-06-22 23:34:30 +02:00
6 changed files with 3137 additions and 291117 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,53 +0,0 @@
import torch, numpy as np
from gensim.models import Word2Vec
import inout as io
from nnModel import NeuralNetworkModel, trainModel, predict
def getX(train, dev, test):
Xs = []
for file in [train, dev, test]:
X = io.read(file)
Xs.append([x[0].split() for x in X])
return Xs
def getY(dir):
return [np.array(io.read(file)) for file in dir]
def vectorize(word2vec, documents):
vectorized = []
for d in documents:
vectorized.append(np.mean([word2vec.wv[word] if word in word2vec.wv else np.zeros(100, dtype=float) for word in d], axis=0))
return np.array(vectorized)
if __name__ == '__main__':
trainX, devX, testX = getX('train/in.tsv.xz', 'dev-0/in.tsv.xz', 'test-A/in.tsv.xz')
trainY, devY = getY(['train/expected.tsv', 'dev-0/expected.tsv'])
word2vec = Word2Vec(trainX, vector_size=100, min_count=2)
trainX = vectorize(word2vec, trainX)
devX = vectorize(word2vec, devX)
testX = vectorize(word2vec, testX)
nnModel = NeuralNetworkModel()
optimizer = torch.optim.SGD(nnModel.parameters(), lr = 0.1)
trainModel(nnModel, trainX, trainY, devX, devY, optimizer)
io.write(predict(nnModel, trainX), 'train/out.tsv')
io.write(predict(nnModel, devX), 'dev-0/out.tsv')
io.write(predict(nnModel, testX), 'test-A/out.tsv')

View File

@ -1,67 +0,0 @@
import torch, numpy as np
class NeuralNetworkModel(torch.nn.Module):
def __init__(self, features=100):
super(NeuralNetworkModel, self).__init__()
self.fc1 = torch.nn.Linear(features, 500)
self.fc2 = torch.nn.Linear(500, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
x = torch.sigmoid(x)
return x
def getMetrics(model, X_dataset, Y_dataset, criterion, batchSize):
loss_score = 0
acc_score = 0
items_total = 0
model.eval()
for i in range(0, Y_dataset.shape[0], batchSize):
X = X_dataset[i:i+batchSize]
X = torch.tensor(X.astype(np.float32))
Y = Y_dataset[i:i+batchSize]
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
Y_predictions = model(X)
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
items_total += Y.shape[0]
loss = criterion(Y_predictions, Y)
loss_score += loss.item() * Y.shape[0]
return (loss_score / items_total), (acc_score / items_total)
def trainModel(model, trainX, trainY, devX, devY, optimizer, criterion=torch.nn.BCELoss(), epochs=5, batchSize=256):
for epoch in range(epochs):
loss_score = 0
acc_score = 0
items_total = 0
model.train()
for i in range(0, trainY.shape[0], batchSize):
X = trainX[i:i+batchSize]
X = torch.tensor(X.astype(np.float32))
Y = trainY[i:i+batchSize]
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
Y_predictions = model(X)
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
items_total += Y.shape[0]
optimizer.zero_grad()
loss = criterion(Y_predictions, Y)
loss.backward()
optimizer.step()
loss_score += loss.item() * Y.shape[0]
print(f'Epoch {epoch+1}/{epochs}')
loss, accuracy = getMetrics(model, trainX, trainY, criterion, batchSize)
print(f'Train set\nloss = {loss}, accuracy = {accuracy}')
def flatten(t):
return [str(int(item)) for sublist in t for item in sublist]
def predict(model, testX):
testX = torch.tensor(testX.astype(np.float32))
with torch.no_grad():
return flatten(model(testX).round().tolist())

File diff suppressed because it is too large Load Diff

289579
train/out.tsv

File diff suppressed because it is too large Load Diff

1719
transformery.ipynb Normal file

File diff suppressed because it is too large Load Diff