paranormal-or-skeptic-ISI-p.../generate.py
2021-06-20 22:05:07 +02:00

56 lines
1.6 KiB
Python

import pandas as pd
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
import torch
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import torch.nn as nn
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from model import BERT_Arch
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
path = 'saved_weights.pt'
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
device = torch.device("cuda")
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
model = BERT_Arch(bert)
model.load_state_dict(torch.load(path))
model.to(device)
test_data = pd.read_csv("dev-0/in.tsv", sep="\t")
test_data.columns = ["text", "d"]
test_target = pd.read_csv("dev-0/expected.tsv", sep="\t")
tokens_train = tokenizer.batch_encode_plus(
test_data["text"],
max_length = 25,
padding='max_length',
truncation=True
)
test_seq = torch.tensor(tokens_train['input_ids'])
test_mask = torch.tensor(tokens_train['attention_mask'])
#define a batch size
batch_size = 32
# wrap tensors
test_data = TensorDataset(test_seq, test_mask)
# sampler for sampling the data during training
test_sampler = RandomSampler(test_data)
# dataLoader for train set
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
with torch.no_grad():
preds = model(test_seq.to(device), test_mask.to(device))
preds = preds.detach().cpu().numpy()
preds = np.argmax(preds, axis = 1)
print(classification_report(test_target['0'], preds))
print(accuracy_score(test_target['0'], preds))