Add solution for full dataset
This commit is contained in:
parent
142f0ca72e
commit
48d4451077
574
dev-0/out.tsv
574
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
25
solution.py
25
solution.py
@ -20,22 +20,23 @@ model = NeuralNetworkModel()
|
||||
criterion = torch.nn.BCELoss()
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
|
||||
|
||||
|
||||
def train():
|
||||
with open('train/train.tsv') as f:
|
||||
docs = [line.rstrip() for line in f]
|
||||
docs_preprocessed = []
|
||||
y = []
|
||||
for doc in docs[:10000]:
|
||||
for doc in docs:
|
||||
y_with_doc = doc.split('\t')
|
||||
y.append(y_with_doc[0])
|
||||
doc = y_with_doc[1]
|
||||
doc = preprocess(doc)
|
||||
docs_preprocessed.append(doc)
|
||||
docs_preprocessed.append(preprocess(y_with_doc[1]))
|
||||
y = [int(value) for value in y]
|
||||
y = np.reshape(y, (len(y), 1))
|
||||
tagged_documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(docs_preprocessed)]
|
||||
print("Fitting vectorizer...")
|
||||
global vectorizer
|
||||
vectorizer = Doc2Vec(tagged_documents, min_count=10, epochs=300, dm=0, vector_size=n_features)
|
||||
print("DONE!")
|
||||
x = vectorizer.dv.vectors
|
||||
for epoch in range(5):
|
||||
loss_score = 0
|
||||
@ -58,25 +59,21 @@ def train():
|
||||
print(epoch)
|
||||
print(get_loss_acc(model, x, y, criterion, optimizer))
|
||||
|
||||
|
||||
def classify(path):
|
||||
print("Predicting for", path)
|
||||
with open(path + 'in.tsv') as f:
|
||||
docs = [line.rstrip() for line in f]
|
||||
docs = [preprocess(doc) for doc in docs]
|
||||
test_x = [vectorizer.infer_vector(doc) for doc in docs]
|
||||
test_x = np.array(test_x, np.float32)
|
||||
test_x = torch.tensor(test_x)
|
||||
predictions = model(test_x).detach().numpy()[:, 0]
|
||||
p = []
|
||||
for prediction in predictions:
|
||||
if prediction >= 0.5:
|
||||
p.append(1)
|
||||
else:
|
||||
p.append(0)
|
||||
predictions = model(torch.tensor(test_x)).detach().numpy()[:, 0]
|
||||
predictions = [1 if value >= 0.5 else 0 for value in predictions]
|
||||
with open(path + 'out.tsv', 'w') as file:
|
||||
for prediction in p:
|
||||
for prediction in predictions:
|
||||
file.write("%i\n" % prediction)
|
||||
|
||||
|
||||
train()
|
||||
classify('dev-0/')
|
||||
# classify('test-A/')
|
||||
classify('test-A/')
|
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user