import torch import torch.nn as nn import torch.optim as optim import itertools as IT import numpy as np import csv class LogisticRegression(torch.nn.Module): def __init__(self, WORDS_IN_DICTIONARY): super(LogisticRegression, self).__init__() self.linear = torch.nn.Linear(WORDS_IN_DICTIONARY, 2) def forward(self, x): y_pred = torch.sigmoid(self.linear(x)) return y_pred def make_vector(sentence, dictionary): vector = torch.zeros(len(dictionary)) for word in sentence: vector[dictionary[word]] += 1 return vector.view(1, -1) def read_data(path): line = open(path, 'r').readlines()[0:1000] data = [] for word in line: data.append(word.split()) return data def main(): train_data = read_data("train/in.tsv") temp = open('train/expected.tsv', 'r').readlines()[0:1000] train_data_output = [] for sent in temp: train_data_output.append(int(sent)) lines = open('dev-0/in.tsv', 'r').readlines() test_data = [] for line in lines: test_data.append(line.split()) output = open('dev-0/out.tsv', 'w') dictionary = {} for sent in train_data + test_data: for word in sent: if word not in dictionary: dictionary[word] = len(dictionary) WORDS_IN_DICTIONARY = len(dictionary) model = LogisticRegression(WORDS_IN_DICTIONARY) criterion = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=0.1) epochs = 100 for epoch in range(epochs): if epoch % 10 == 0: print(str(epoch/epochs * 100) + "%") for instance, label in IT.zip_longest(train_data, train_data_output): vector = make_vector(instance, dictionary) target = torch.LongTensor([label]) model.zero_grad() log_probs = model(vector) loss = criterion(log_probs, target) loss.backward() optimizer.step() for instance in test_data: vec = make_vector(instance, dictionary) log_probs = model(vec) y_pred = np.argmax(log_probs[0].detach().numpy()) output.write(str(int(y_pred)) + '\n') output.close() if __name__ == '__main__': main()