import pandas as pd from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification import torch import matplotlib.pyplot as plt from torch.utils.data import TensorDataset, DataLoader, RandomSampler import torch.nn as nn from sklearn.utils.class_weight import compute_class_weight import numpy as np from model import BERT_Arch from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score path = 'saved_weights.pt' tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') device = torch.device("cuda") bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased') model = BERT_Arch(bert) model.load_state_dict(torch.load(path)) model.to(device) test_data = pd.read_csv("dev-0/in.tsv", sep="\t") test_data.columns = ["text", "d"] test_target = pd.read_csv("dev-0/expected.tsv", sep="\t") tokens_train = tokenizer.batch_encode_plus( test_data["text"], max_length = 25, padding='max_length', truncation=True ) test_seq = torch.tensor(tokens_train['input_ids']) test_mask = torch.tensor(tokens_train['attention_mask']) #define a batch size batch_size = 32 # wrap tensors test_data = TensorDataset(test_seq, test_mask) # sampler for sampling the data during training test_sampler = RandomSampler(test_data) # dataLoader for train set test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size) with torch.no_grad(): preds = model(test_seq.to(device), test_mask.to(device)) preds = preds.detach().cpu().numpy() preds = np.argmax(preds, axis = 1) print(classification_report(test_target['0'], preds)) print(accuracy_score(test_target['0'], preds))