paranormal-or-skeptic-ISI-p.../generate.py

56 lines
1.6 KiB
Python
Raw Normal View History

2021-06-20 22:05:07 +02:00
import pandas as pd
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
import torch
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import torch.nn as nn
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from model import BERT_Arch
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
path = 'saved_weights.pt'
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
device = torch.device("cuda")
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
model = BERT_Arch(bert)
model.load_state_dict(torch.load(path))
model.to(device)
test_data = pd.read_csv("dev-0/in.tsv", sep="\t")
test_data.columns = ["text", "d"]
test_target = pd.read_csv("dev-0/expected.tsv", sep="\t")
tokens_train = tokenizer.batch_encode_plus(
test_data["text"],
max_length = 25,
padding='max_length',
truncation=True
)
test_seq = torch.tensor(tokens_train['input_ids'])
test_mask = torch.tensor(tokens_train['attention_mask'])
#define a batch size
batch_size = 32
# wrap tensors
test_data = TensorDataset(test_seq, test_mask)
# sampler for sampling the data during training
test_sampler = RandomSampler(test_data)
# dataLoader for train set
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
with torch.no_grad():
preds = model(test_seq.to(device), test_mask.to(device))
preds = preds.detach().cpu().numpy()
preds = np.argmax(preds, axis = 1)
print(classification_report(test_target['0'], preds))
print(accuracy_score(test_target['0'], preds))