paranormal-or-skeptic/neurotic.py

53 lines
1.3 KiB
Python

import torch, numpy as np
from gensim.models import Word2Vec
import inout as io
from nnModel import NeuralNetworkModel, trainModel, predict
def getX(train, dev, test):
Xs = []
for file in [train, dev, test]:
X = io.read(file)
Xs.append([x[0].split() for x in X])
return Xs
def getY(dir):
return [np.array(io.read(file)) for file in dir]
def vectorize(word2vec, documents):
vectorized = []
for d in documents:
vectorized.append(np.mean([word2vec.wv[word] if word in word2vec.wv else np.zeros(100, dtype=float) for word in d], axis=0))
return np.array(vectorized)
if __name__ == '__main__':
trainX, devX, testX = getX('train/in.tsv.xz', 'dev-0/in.tsv.xz', 'test-A/in.tsv.xz')
trainY, devY = getY(['train/expected.tsv', 'dev-0/expected.tsv'])
word2vec = Word2Vec(trainX, vector_size=100, min_count=2)
trainX = vectorize(word2vec, trainX)
devX = vectorize(word2vec, devX)
testX = vectorize(word2vec, testX)
nnModel = NeuralNetworkModel()
optimizer = torch.optim.SGD(nnModel.parameters(), lr = 0.1)
trainModel(nnModel, trainX, trainY, devX, devY, optimizer)
io.write(predict(nnModel, trainX), 'train/out.tsv')
io.write(predict(nnModel, devX), 'dev-0/out.tsv')
io.write(predict(nnModel, testX), 'test-A/out.tsv')