77 lines
2.0 KiB
Python
77 lines
2.0 KiB
Python
|
import torch
|
||
|
import torch.nn as nn
|
||
|
import torch.optim as optim
|
||
|
import itertools as IT
|
||
|
import numpy as np
|
||
|
import csv
|
||
|
|
||
|
class LogisticRegression(torch.nn.Module):
|
||
|
def __init__(self):
|
||
|
super(LogisticRegression, self).__init__()
|
||
|
self.linear = torch.nn.Linear(WORDS_IN_DICTIONARY, 2)
|
||
|
def forward(self, x):
|
||
|
y_pred = torch.sigmoid(self.linear(x))
|
||
|
return y_pred
|
||
|
def make_vector(sentence, dictionary):
|
||
|
vector = torch.zeros(len(dictionary))
|
||
|
for word in sentence:
|
||
|
vector[dictionary[word]] += 1
|
||
|
|
||
|
return vector.view(1, -1)
|
||
|
|
||
|
def read_data(path):
|
||
|
line = open(path, 'r').readlines()[0:2000]
|
||
|
data = []
|
||
|
for word in line:
|
||
|
data.append(word.split())
|
||
|
return data
|
||
|
|
||
|
def main():
|
||
|
train_data = read_data("train/in.tsv")
|
||
|
temp = open('train/expected.tsv', 'r').readlines()[0:2000]
|
||
|
train_data_output = []
|
||
|
for sent in temp:
|
||
|
train_data_output.append(int(sent))
|
||
|
|
||
|
test_data = read_data('test-A/in.tsv')
|
||
|
output = open('test-A/out.tsv', 'w')
|
||
|
|
||
|
|
||
|
dictionary = {}
|
||
|
for sent in train_data + test_data:
|
||
|
for word in sent:
|
||
|
if word not in dictionary:
|
||
|
dictionary[word] = len(dictionary)
|
||
|
|
||
|
WORDS_IN_DICTIONARY = len(dictionary)
|
||
|
|
||
|
model = LogisticRegression()
|
||
|
|
||
|
criterion = nn.NLLLoss()
|
||
|
optimizer = optim.SGD(model.parameters(), lr=0.1)
|
||
|
|
||
|
epochs = 100
|
||
|
for epoch in range(epochs):
|
||
|
if epoch % 10 == 0:
|
||
|
print(str(epoch/epochs * 100) + "%")
|
||
|
for instance, label in IT.zip_longest(train_data, train_data_output):
|
||
|
vector = make_vector(instance, dictionary)
|
||
|
target = torch.LongTensor([label])
|
||
|
model.zero_grad()
|
||
|
log_probs = model(vector)
|
||
|
loss = criterion(log_probs, target)
|
||
|
loss.backward()
|
||
|
optimizer.step()
|
||
|
|
||
|
for instance in test_data:
|
||
|
vec = make_vector(instance, dictionary)
|
||
|
log_probs = model(vec)
|
||
|
y_pred = np.argmax(log_probs[0].detach().numpy())
|
||
|
output.write(str(int(y_pred)) + '\n')
|
||
|
|
||
|
output.close()
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|