import os import pandas as pd import tensorflow as tf import numpy as np import torch import torch.nn as nn from tensorflow.keras.layers.experimental.preprocessing import TextVectorization from sklearn.feature_extraction.text import HashingVectorizer import torch.nn.functional as F import torch.optim as optim import torch from torch.optim import optimizer vectorizer = HashingVectorizer(n_features=20) # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print('debug 1') train_df = pd.read_csv('train/in.tsv', header=None, sep='\t') test_df = pd.read_csv('test-A/in.tsv', header=None, sep='\t') dev_df = pd.read_csv('dev-0/in.tsv', header=None, sep='\t') train_expected = pd.read_csv('train/expected.tsv', header=None, sep='\t') train_text = train_df[0].tolist() test_text = test_df[0].tolist() dev_text = test_df[0].tolist() text_data = train_text # print(train_text) vectorize_layer = TextVectorization(max_tokens=5, output_mode="int") text_data = tf.data.Dataset.from_tensor_slices(text_data) vectorize_layer.adapt(text_data.batch(64)) inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string, name="text") outputs = vectorize_layer(inputs) model = tf.keras.Model(inputs, outputs) print('model loaded') train_text = train_df[0].apply(lambda x: vectorizer.transform([x])) test_text = test_df[0].apply(lambda x: vectorizer.transform([x])) x_train = train_text.tolist() x_test = test_text.tolist() # x_train = list(map(model.predict, train_text)) # x_train = [model.predict([x]) for x in train_text] y_train = train_expected[0].astype(np.float32) # x_test = list(map(model.predict, test_text)) # x_test = [model.predict([x]) for x in test_text] loss_function = nn.CrossEntropyLoss() x_train = pd.DataFrame(x_train) x_test = pd.DataFrame(x_test) y_train = pd.DataFrame(y_train) print("End of vectorization") # print((model.predict("Murder in the forset!"))) class FeedforwardNeuralNetModel(nn.Module): def __init__(self): super(FeedforwardNeuralNetModel, self).__init__() # Linear function 1: vocab_size --> 500 self.fc1 = nn.Linear(FEAUTERES, 500) # Non-linearity 1 self.fc2 = nn.Linear(500,1) # self.relu1 = nn.ReLU() # Linear function 2: 500 --> 500 # self.fc2 = nn.Linear(hidden_dim, hidden_dim) # Non-linearity 2 # self.relu2 = nn.ReLU() # Linear function 3 (readout): 500 --> 3 # self.fc3 = nn.Linear(hidden_dim, output_dim) def forward(self, x): # Linear function 1 out = self.fc1(x) # Non-linearity 1 out = self.relu1(out) # Non-linearity 2 out = self.relu2(out) # Linear function 3 (readout) return out num_epochs = 2 x_dict = x_train.to_dict() y_train = y_train.to_dict() nn_model = FeedforwardNeuralNetModel() BATCH_SIZE = 5 criterion = torch.nn.BCELoss() optimizer = torch.optim.SGD(nn_model.parameters(), lr = 0.1) for epoch in range(5): loss_score = 0 acc_score = 0 items_total = 0 nn_model.train() for i in range(0, Y_train.shape[0], BATCH_SIZE): X = X_train[i:i+BATCH_SIZE] X = torch.tensor(X.astype(np.float32).todense()) Y = Y_train[i:i+BATCH_SIZE] Y = torch.tensor(Y.astype(np.float32)).reshape(-1,1) Y_predictions = nn_model(X) acc_score += torch.sum((Y_predictions > 0.5) == Y).item() items_total += Y.shape[0] optimizer.zero_grad() loss = criterion(Y_predictions, Y) loss.backward() optimizer.step() loss_score += loss.item() * Y.shape[0] # for epoch in range(num_epochs): # if (epoch + 1) % 25 == 0: # print("Epoch completed: " + str(epoch + 1)) # print(f"Epoch number: {epoch}") # train_loss = 0 # for index, row in x_train.iterrows(): # # for index, row in x_train.iterrows(): # # # print(row, index) # # Forward pass to get output # probs = x_train[0][index] # # probs = torch.tensor(probs.astype(np.float32)) # # Get the target label # target = y_train[0][index] # print(target) # # target = np.array(target).astype(np.float32) # print(type(target)) # # target = .astype(np.float32).reshape(-1,1) # # target # # target = torch.tensor(target.astype(np.float32)).reshape(-1,1) # # # Calculate Loss: softmax --> cross entropy loss # loss = loss_function(probs, target) # # Accumulating the loss over time # train_loss += loss.item() # # # Getting gradients w.r.t. parameters # loss.backward() # # train_loss = 0 # bow_ff_nn_predictions = [] # original_lables_ff_bow = [] # with torch.no_grad(): # for index, row in x_test.iterrows(): # probs = x_test[0][index] # bow_ff_nn_predictions.append(torch.argmax(probs, dim=1).cpu().numpy()[0]) # # print(bow_ff_nn_predictions)