80 lines
2.1 KiB
Python
80 lines
2.1 KiB
Python
import torch
|
|
import numpy as np
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
from torch.autograd import Variable
|
|
from torch.nn import functional as F
|
|
import itertools as itertools
|
|
|
|
data = []
|
|
data_out = []
|
|
input_dim = 2
|
|
epochs = 200
|
|
label_to_ix = {0: 0, 1: 1}
|
|
|
|
|
|
for sent in open('train/in.tsv', 'r').readlines()[0:1000]:
|
|
data.append(sent.split())
|
|
for sent in open('train/expected.tsv', 'r').readlines()[0:1000]:
|
|
data_out.append(int(sent))
|
|
|
|
word_to_ix = {}
|
|
for x in data:
|
|
for y in x:
|
|
if y not in word_to_ix:
|
|
word_to_ix[y] = len(word_to_ix)
|
|
|
|
NUM_LABELS = 2
|
|
output_dim = len(word_to_ix)
|
|
|
|
class LogisticRegression(torch.nn.Module):
|
|
def __init__(self, NUM_LABELS, output_dim):
|
|
super(LogisticRegression, self).__init__()
|
|
self.linear = torch.nn.Linear(output_dim, NUM_LABELS)
|
|
def forward(self, x):
|
|
return F.log_softmax(self.linear(x), dim=1)
|
|
|
|
model = LogisticRegression(NUM_LABELS, output_dim)
|
|
loss_function = nn.NLLLoss()
|
|
optimizer = optim.SGD(model.parameters(), lr=0.1)
|
|
|
|
def make_target(label, label_to_ix):
|
|
return torch.LongTensor([label_to_ix[label]])
|
|
|
|
def make_bow_vector(sentence, word_to_ix):
|
|
vec = torch.zeros(len(word_to_ix))
|
|
for word in sentence:
|
|
# if word in word_to_ix:
|
|
vec[word_to_ix[word]] += 1
|
|
return vec.view(1, -1)
|
|
|
|
for epoch in range(int(epochs)):
|
|
for instance, label in itertools.zip_longest(data, data_out):
|
|
bow_vec = make_bow_vector(instance, word_to_ix)
|
|
target = make_target(label, {0: 0, 1: 1})
|
|
# target = torch.LongTensor([label])
|
|
model.zero_grad()
|
|
log_probs = model(bow_vec)
|
|
loss = loss_function(log_probs, target)
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
|
|
inputf = open('test-A/in.tsv', 'r')
|
|
outputf = open('test-A/out.tsv', 'w')
|
|
data = []
|
|
|
|
|
|
with torch.no_grad():
|
|
for line in inputf.readlines():
|
|
bow_vector = make_bow_vector(x, word_to_ix)
|
|
log_probs = model(bow_vector)
|
|
if log_probs[0][0]> log_probs[0][1]:
|
|
outputf.write("0\n")
|
|
else:
|
|
outputf.write("1\n")
|
|
|
|
|
|
|
|
|