paranormal-or-skeptic-ISI-p.../neural-network.py
2021-05-13 15:53:20 +02:00

167 lines
4.8 KiB
Python

import os
import pandas as pd
import tensorflow as tf
import numpy as np
import torch
import torch.nn as nn
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from sklearn.feature_extraction.text import HashingVectorizer
import torch.nn.functional as F
import torch.optim as optim
import torch
from torch.optim import optimizer
vectorizer = HashingVectorizer(n_features=20)
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
print('debug 1')
train_df = pd.read_csv('train/in.tsv', header=None, sep='\t')
test_df = pd.read_csv('test-A/in.tsv', header=None, sep='\t')
dev_df = pd.read_csv('dev-0/in.tsv', header=None, sep='\t')
train_expected = pd.read_csv('train/expected.tsv', header=None, sep='\t')
train_text = train_df[0].tolist()
test_text = test_df[0].tolist()
dev_text = test_df[0].tolist()
text_data = train_text
# print(train_text)
vectorize_layer = TextVectorization(max_tokens=5, output_mode="int")
text_data = tf.data.Dataset.from_tensor_slices(text_data)
vectorize_layer.adapt(text_data.batch(64))
inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string, name="text")
outputs = vectorize_layer(inputs)
model = tf.keras.Model(inputs, outputs)
print('model loaded')
train_text = train_df[0].apply(lambda x: vectorizer.transform([x]))
test_text = test_df[0].apply(lambda x: vectorizer.transform([x]))
x_train = train_text.tolist()
x_test = test_text.tolist()
# x_train = list(map(model.predict, train_text))
# x_train = [model.predict([x]) for x in train_text]
y_train = train_expected[0].astype(np.float32)
# x_test = list(map(model.predict, test_text))
# x_test = [model.predict([x]) for x in test_text]
loss_function = nn.CrossEntropyLoss()
x_train = pd.DataFrame(x_train)
x_test = pd.DataFrame(x_test)
y_train = pd.DataFrame(y_train)
print("End of vectorization")
# print((model.predict("Murder in the forset!")))
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self):
super(FeedforwardNeuralNetModel, self).__init__()
# Linear function 1: vocab_size --> 500
self.fc1 = nn.Linear(FEAUTERES, 500)
# Non-linearity 1
self.fc2 = nn.Linear(500,1)
# self.relu1 = nn.ReLU()
# Linear function 2: 500 --> 500
# self.fc2 = nn.Linear(hidden_dim, hidden_dim)
# Non-linearity 2
# self.relu2 = nn.ReLU()
# Linear function 3 (readout): 500 --> 3
# self.fc3 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Linear function 1
out = self.fc1(x)
# Non-linearity 1
out = self.relu1(out)
# Non-linearity 2
out = self.relu2(out)
# Linear function 3 (readout)
return out
num_epochs = 2
x_dict = x_train.to_dict()
y_train = y_train.to_dict()
nn_model = FeedforwardNeuralNetModel()
BATCH_SIZE = 5
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(nn_model.parameters(), lr = 0.1)
for epoch in range(5):
loss_score = 0
acc_score = 0
items_total = 0
nn_model.train()
for i in range(0, Y_train.shape[0], BATCH_SIZE):
X = X_train[i:i+BATCH_SIZE]
X = torch.tensor(X.astype(np.float32).todense())
Y = Y_train[i:i+BATCH_SIZE]
Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1)
Y_predictions = nn_model(X)
acc_score += torch.sum((Y_predictions > 0.5) == Y).item()
items_total += Y.shape[0]
optimizer.zero_grad()
loss = criterion(Y_predictions, Y)
loss.backward()
optimizer.step()
loss_score += loss.item() * Y.shape[0]
# for epoch in range(num_epochs):
# if (epoch + 1) % 25 == 0:
# print("Epoch completed: " + str(epoch + 1))
# print(f"Epoch number: {epoch}")
# train_loss = 0
# for index, row in x_train.iterrows():
# # for index, row in x_train.iterrows():
#
#
# print(row, index)
# # Forward pass to get output
# probs = x_train[0][index]
# # probs = torch.tensor(probs.astype(np.float32))
# # Get the target label
# target = y_train[0][index]
# print(target)
# # target = np.array(target).astype(np.float32)
# print(type(target))
# # target = .astype(np.float32).reshape(-1,1)
# # target
# # target = torch.tensor(target.astype(np.float32)).reshape(-1,1)
#
# # Calculate Loss: softmax --> cross entropy loss
# loss = loss_function(probs, target)
# # Accumulating the loss over time
# train_loss += loss.item()
#
# # Getting gradients w.r.t. parameters
# loss.backward()
#
# train_loss = 0
# bow_ff_nn_predictions = []
# original_lables_ff_bow = []
# with torch.no_grad():
# for index, row in x_test.iterrows():
# probs = x_test[0][index]
# bow_ff_nn_predictions.append(torch.argmax(probs, dim=1).cpu().numpy()[0])
#
# print(bow_ff_nn_predictions)