paranormal-or-skeptic - pytorch NNet

predictions
2022-05-25 23:06:35 +02:00 · 2022-05-25 22:29:49 +02:00 · 2022-05-25 21:41:24 +02:00
4 changed files with 10537 additions and 0 deletions
--- a/Net.py
+++ b/Net.py
@ -0,0 +1,16 @@
 import torch.nn as nn
 from torch import relu, sigmoid
 class NNet(nn.Module):
    def __init__(self):
        super(NNet, self).__init__()
        self.ll1 = nn.Linear(100, 1000)
        self.ll2 = nn.Linear(1000, 400)
        self.ll3 = nn.Linear(400, 1)
    def forward(self, x):
        x = relu(self.ll1(x))
        x = relu(self.ll2(x))
        x = sigmoid(self.ll3(x))
        return x
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/run.py
+++ b/run.py
@ -0,0 +1,97 @@
 import gensim.downloader
 import torch.optim as optim
 import torch.nn as nn
 import torch
 import numpy as np
 from Net import NNet
 #from timeit import default_timer as timer
 def read_data(folder_name):
    with open(f'{folder_name}/in.tsv', encoding='utf-8') as file:
        x = [line.lower().split()[:-2] for line in file.readlines()]
    with open(f'{folder_name}/expected.tsv', encoding='utf-8') as file:
        y = [int(line.split()[0]) for line in file.readlines()]
    return x, y
 def process_data(data, word2vec):
    processed_data = []
    for reddit in data:
        words_sim = [word2vec[word] for word in reddit if word in word2vec]
        processed_data.append(np.mean(words_sim or [np.zeros(100)], axis=0))
    return processed_data
 def predict(folder_name, model, word_vec):
    with open(f'{folder_name}/in.tsv', encoding='utf-8') as file:
        x_data = [line.lower().split()[:-2] for line in file.readlines()]
    x_train = process_data(x_data, word_vec)
    y_predictions = []
    with torch.no_grad():
        for i, inputs in enumerate(x_train):
            inputs = torch.tensor(inputs.astype(np.float32)).to(device)
            y_predicted = model(inputs)
            y_predictions.append(y_predicted > 0.5)
    return y_predictions
 def save_predictions(folder_name, predicted_labels):
    predictions = []
    for pred in predicted_labels:
        predictions.append(pred.int()[0].item())
    with open(f"{folder_name}/out.tsv", "w", encoding="UTF-8") as file_out:
        for pred in predictions:
            file_out.writelines(f"{str(pred)}\n")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(device)   #gpu is a bit faster here
 word_vectors = gensim.downloader.load("glove-wiki-gigaword-100")
 x_data, y_train = read_data('train')
 x_train = process_data(x_data, word_vectors)
 model = NNet().to(device)
 criterion = nn.BCELoss()
 optimizer = optim.SGD(model.parameters(), lr=0.005) #, momentum=0.9)
 for epoch in range(2):
    running_loss = 0.0
    correct = 0.
    total = 0.
    for i, (inputs, label) in enumerate(zip(x_train, y_train)):
        inputs = torch.tensor(inputs.astype(np.float32)).to(device)
        label = torch.tensor(np.array(label).astype(np.float32)).reshape(1).to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        y_predicted = model(inputs)
        loss = criterion(y_predicted, label)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        correct += ((y_predicted > 0.5) == label).type(torch.float).sum().item()
        total += label.size(0)
        if i % 10000 == 9999:  # print every 10000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 10000:.3f}')
            print(f'Accuracy score: {100 * correct / total} %')
            running_loss = 0.0
 predicted = predict('dev-0', model, word_vectors)
 save_predictions('dev-0', predicted)
 predicted = predict('test-A', model, word_vectors)
 save_predictions('test-A', predicted)
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
Author	SHA1	Message	Date
Cezary	42ede5e2c7	paranormal-or-skeptic - pytorch NNet	2022-05-25 23:06:35 +02:00
Cezary	2fc8abbc87	predictions	2022-05-25 22:29:49 +02:00
Cezary	328cb684d4	predictions	2022-05-25 21:41:24 +02:00