import torch import numpy as np import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.nn import functional as F import itertools as itertools data = [] data_out = [] input_dim = 2 epochs = 200 label_to_ix = {0: 0, 1: 1} for sent in open('train/in.tsv', 'r').readlines()[0:1000]: data.append(sent.split()) for sent in open('train/expected.tsv', 'r').readlines()[0:1000]: data_out.append(int(sent)) word_to_ix = {} for x in data: for y in x: if y not in word_to_ix: word_to_ix[y] = len(word_to_ix) NUM_LABELS = 2 output_dim = len(word_to_ix) class LogisticRegression(torch.nn.Module): def __init__(self, NUM_LABELS, output_dim): super(LogisticRegression, self).__init__() self.linear = torch.nn.Linear(output_dim, NUM_LABELS) def forward(self, x): return F.log_softmax(self.linear(x), dim=1) model = LogisticRegression(NUM_LABELS, output_dim) loss_function = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=0.1) def make_target(label, label_to_ix): return torch.LongTensor([label_to_ix[label]]) def make_bow_vector(sentence, word_to_ix): vec = torch.zeros(len(word_to_ix)) for word in sentence: # if word in word_to_ix: vec[word_to_ix[word]] += 1 return vec.view(1, -1) for epoch in range(int(epochs)): for instance, label in itertools.zip_longest(data, data_out): bow_vec = make_bow_vector(instance, word_to_ix) target = make_target(label, {0: 0, 1: 1}) # target = torch.LongTensor([label]) model.zero_grad() log_probs = model(bow_vec) loss = loss_function(log_probs, target) loss.backward() optimizer.step() inputf = open('test-A/in.tsv', 'r') outputf = open('test-A/out.tsv', 'w') data = [] with torch.no_grad(): for line in inputf.readlines(): bow_vector = make_bow_vector(x, word_to_ix) log_probs = model(bow_vector) if log_probs[0][0]> log_probs[0][1]: outputf.write("0\n") else: outputf.write("1\n")