53 lines
1.3 KiB
Python
53 lines
1.3 KiB
Python
import torch, numpy as np
|
|
from gensim.models import Word2Vec
|
|
import inout as io
|
|
from nnModel import NeuralNetworkModel, trainModel, predict
|
|
|
|
|
|
def getX(train, dev, test):
|
|
Xs = []
|
|
for file in [train, dev, test]:
|
|
X = io.read(file)
|
|
Xs.append([x[0].split() for x in X])
|
|
return Xs
|
|
|
|
def getY(dir):
|
|
return [np.array(io.read(file)) for file in dir]
|
|
|
|
def vectorize(word2vec, documents):
|
|
vectorized = []
|
|
for d in documents:
|
|
vectorized.append(np.mean([word2vec.wv[word] if word in word2vec.wv else np.zeros(100, dtype=float) for word in d], axis=0))
|
|
return np.array(vectorized)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
trainX, devX, testX = getX('train/in.tsv.xz', 'dev-0/in.tsv.xz', 'test-A/in.tsv.xz')
|
|
trainY, devY = getY(['train/expected.tsv', 'dev-0/expected.tsv'])
|
|
|
|
word2vec = Word2Vec(trainX, vector_size=100, min_count=2)
|
|
|
|
trainX = vectorize(word2vec, trainX)
|
|
devX = vectorize(word2vec, devX)
|
|
testX = vectorize(word2vec, testX)
|
|
|
|
nnModel = NeuralNetworkModel()
|
|
optimizer = torch.optim.SGD(nnModel.parameters(), lr = 0.1)
|
|
|
|
trainModel(nnModel, trainX, trainY, devX, devY, optimizer)
|
|
|
|
io.write(predict(nnModel, trainX), 'train/out.tsv')
|
|
io.write(predict(nnModel, devX), 'dev-0/out.tsv')
|
|
io.write(predict(nnModel, testX), 'test-A/out.tsv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|