s443930
This commit is contained in:
parent
587808bce9
commit
f8ce7024e4
1406
dev-0/out.tsv
1406
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
53
neurotic.py
53
neurotic.py
@ -1,53 +0,0 @@
|
||||
import torch, numpy as np
|
||||
from gensim.models import Word2Vec
|
||||
import inout as io
|
||||
from nnModel import NeuralNetworkModel, trainModel, predict
|
||||
|
||||
|
||||
def getX(train, dev, test):
|
||||
Xs = []
|
||||
for file in [train, dev, test]:
|
||||
X = io.read(file)
|
||||
Xs.append([x[0].split() for x in X])
|
||||
return Xs
|
||||
|
||||
def getY(dir):
|
||||
return [np.array(io.read(file)) for file in dir]
|
||||
|
||||
def vectorize(word2vec, documents):
|
||||
vectorized = []
|
||||
for d in documents:
|
||||
vectorized.append(np.mean([word2vec.wv[word] if word in word2vec.wv else np.zeros(100, dtype=float) for word in d], axis=0))
|
||||
return np.array(vectorized)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
trainX, devX, testX = getX('train/in.tsv.xz', 'dev-0/in.tsv.xz', 'test-A/in.tsv.xz')
|
||||
trainY, devY = getY(['train/expected.tsv', 'dev-0/expected.tsv'])
|
||||
|
||||
word2vec = Word2Vec(trainX, vector_size=100, min_count=2)
|
||||
|
||||
trainX = vectorize(word2vec, trainX)
|
||||
devX = vectorize(word2vec, devX)
|
||||
testX = vectorize(word2vec, testX)
|
||||
|
||||
nnModel = NeuralNetworkModel()
|
||||
optimizer = torch.optim.SGD(nnModel.parameters(), lr = 0.1)
|
||||
|
||||
trainModel(nnModel, trainX, trainY, devX, devY, optimizer)
|
||||
|
||||
io.write(predict(nnModel, trainX), 'train/out.tsv')
|
||||
io.write(predict(nnModel, devX), 'dev-0/out.tsv')
|
||||
io.write(predict(nnModel, testX), 'test-A/out.tsv')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
67
nnModel.py
67
nnModel.py
@ -1,67 +0,0 @@
|
||||
import torch, numpy as np
|
||||
|
||||
class NeuralNetworkModel(torch.nn.Module):
|
||||
def __init__(self, features=100):
|
||||
super(NeuralNetworkModel, self).__init__()
|
||||
self.fc1 = torch.nn.Linear(features, 500)
|
||||
self.fc2 = torch.nn.Linear(500, 1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc1(x)
|
||||
x = torch.relu(x)
|
||||
x = self.fc2(x)
|
||||
x = torch.sigmoid(x)
|
||||
return x
|
||||
|
||||
def getMetrics(model, X_dataset, Y_dataset, criterion, batchSize):
|
||||
loss_score = 0
|
||||
acc_score = 0
|
||||
items_total = 0
|
||||
model.eval()
|
||||
for i in range(0, Y_dataset.shape[0], batchSize):
|
||||
X = X_dataset[i:i+batchSize]
|
||||
X = torch.tensor(X.astype(np.float32))
|
||||
Y = Y_dataset[i:i+batchSize]
|
||||
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
|
||||
Y_predictions = model(X)
|
||||
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
|
||||
items_total += Y.shape[0]
|
||||
|
||||
loss = criterion(Y_predictions, Y)
|
||||
|
||||
loss_score += loss.item() * Y.shape[0]
|
||||
return (loss_score / items_total), (acc_score / items_total)
|
||||
|
||||
def trainModel(model, trainX, trainY, devX, devY, optimizer, criterion=torch.nn.BCELoss(), epochs=5, batchSize=256):
|
||||
for epoch in range(epochs):
|
||||
loss_score = 0
|
||||
acc_score = 0
|
||||
items_total = 0
|
||||
model.train()
|
||||
for i in range(0, trainY.shape[0], batchSize):
|
||||
X = trainX[i:i+batchSize]
|
||||
X = torch.tensor(X.astype(np.float32))
|
||||
Y = trainY[i:i+batchSize]
|
||||
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
|
||||
Y_predictions = model(X)
|
||||
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
|
||||
items_total += Y.shape[0]
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss = criterion(Y_predictions, Y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
loss_score += loss.item() * Y.shape[0]
|
||||
|
||||
print(f'Epoch {epoch+1}/{epochs}')
|
||||
loss, accuracy = getMetrics(model, trainX, trainY, criterion, batchSize)
|
||||
print(f'Train set\nloss = {loss}, accuracy = {accuracy}')
|
||||
|
||||
def flatten(t):
|
||||
return [str(int(item)) for sublist in t for item in sublist]
|
||||
|
||||
def predict(model, testX):
|
||||
testX = torch.tensor(testX.astype(np.float32))
|
||||
with torch.no_grad():
|
||||
return flatten(model(testX).round().tolist())
|
1430
test-A/out.tsv
1430
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
289579
train/out.tsv
289579
train/out.tsv
File diff suppressed because it is too large
Load Diff
1719
transformery.ipynb
Normal file
1719
transformery.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user