56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
import pandas as pd
|
|
from transformers import BertTokenizer, AdamW, AutoModelForSequenceClassification
|
|
import torch
|
|
import matplotlib.pyplot as plt
|
|
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
|
|
import torch.nn as nn
|
|
from sklearn.utils.class_weight import compute_class_weight
|
|
import numpy as np
|
|
from model import BERT_Arch
|
|
from sklearn.metrics import classification_report
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
path = 'saved_weights.pt'
|
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
|
device = torch.device("cuda")
|
|
|
|
bert = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
|
|
|
|
model = BERT_Arch(bert)
|
|
model.load_state_dict(torch.load(path))
|
|
model.to(device)
|
|
|
|
test_data = pd.read_csv("dev-0/in.tsv", sep="\t")
|
|
test_data.columns = ["text", "d"]
|
|
|
|
test_target = pd.read_csv("dev-0/expected.tsv", sep="\t")
|
|
|
|
tokens_train = tokenizer.batch_encode_plus(
|
|
test_data["text"],
|
|
max_length = 25,
|
|
padding='max_length',
|
|
truncation=True
|
|
)
|
|
|
|
test_seq = torch.tensor(tokens_train['input_ids'])
|
|
test_mask = torch.tensor(tokens_train['attention_mask'])
|
|
|
|
#define a batch size
|
|
batch_size = 32
|
|
|
|
# wrap tensors
|
|
test_data = TensorDataset(test_seq, test_mask)
|
|
|
|
# sampler for sampling the data during training
|
|
test_sampler = RandomSampler(test_data)
|
|
|
|
# dataLoader for train set
|
|
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
|
|
|
|
with torch.no_grad():
|
|
preds = model(test_seq.to(device), test_mask.to(device))
|
|
preds = preds.detach().cpu().numpy()
|
|
preds = np.argmax(preds, axis = 1)
|
|
|
|
print(classification_report(test_target['0'], preds))
|
|
print(accuracy_score(test_target['0'], preds)) |