This commit is contained in:
Kasia 2021-01-27 03:17:41 +01:00
commit be9a457cd3
9 changed files with 1016457 additions and 0 deletions

137314
dev-0/expected.tsv Normal file

File diff suppressed because it is too large Load Diff

137314
dev-0/in.tsv Normal file

File diff suppressed because it is too large Load Diff

137314
dev-0/meta.tsv Normal file

File diff suppressed because it is too large Load Diff

0
dev-0/out.tsv Normal file
View File

156606
dev-1/expected.tsv Normal file

File diff suppressed because it is too large Load Diff

156606
dev-1/in.tsv Normal file

File diff suppressed because it is too large Load Diff

156606
dev-1/meta.tsv Normal file

File diff suppressed because it is too large Load Diff

79
main.py Normal file
View File

@ -0,0 +1,79 @@
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.nn import functional as F
import itertools as itertools
data = []
data_out = []
input_dim = 2
epochs = 200
label_to_ix = {0: 0, 1: 1}
for sent in open('train/in.tsv', 'r').readlines()[0:1000]:
data.append(sent.split())
for sent in open('train/expected.tsv', 'r').readlines()[0:1000]:
data_out.append(int(sent))
word_to_ix = {}
for x in data:
for y in x:
if y not in word_to_ix:
word_to_ix[y] = len(word_to_ix)
NUM_LABELS = 2
output_dim = len(word_to_ix)
class LogisticRegression(torch.nn.Module):
def __init__(self, NUM_LABELS, output_dim):
super(LogisticRegression, self).__init__()
self.linear = torch.nn.Linear(output_dim, NUM_LABELS)
def forward(self, x):
return F.log_softmax(self.linear(x), dim=1)
model = LogisticRegression(NUM_LABELS, output_dim)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
def make_target(label, label_to_ix):
return torch.LongTensor([label_to_ix[label]])
def make_bow_vector(sentence, word_to_ix):
vec = torch.zeros(len(word_to_ix))
for word in sentence:
# if word in word_to_ix:
vec[word_to_ix[word]] += 1
return vec.view(1, -1)
for epoch in range(int(epochs)):
for instance, label in itertools.zip_longest(data, data_out):
bow_vec = make_bow_vector(instance, word_to_ix)
target = make_target(label, {0: 0, 1: 1})
# target = torch.LongTensor([label])
model.zero_grad()
log_probs = model(bow_vec)
loss = loss_function(log_probs, target)
loss.backward()
optimizer.step()
inputf = open('test-A/in.tsv', 'r')
outputf = open('test-A/out.tsv', 'w')
data = []
with torch.no_grad():
for line in inputf.readlines():
bow_vector = make_bow_vector(x, word_to_ix)
log_probs = model(bow_vector)
if log_probs[0][0]> log_probs[0][1]:
outputf.write("0\n")
else:
outputf.write("1\n")

134618
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff