29 lines
1.0 KiB
Python
29 lines
1.0 KiB
Python
|
import pickle
|
||
|
from config import LABELS_LIST, MODEL
|
||
|
from transformers import AutoTokenizer
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
device = 'cuda'
|
||
|
model_path= './roberta-ireland'
|
||
|
|
||
|
from transformers import AutoModelForSequenceClassification
|
||
|
|
||
|
model = AutoModelForSequenceClassification.from_pretrained(model_path).cuda()
|
||
|
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
||
|
|
||
|
for dataset in ('dev-0', 'test-A', 'test-B'):
|
||
|
with open(f'../{dataset}/huggingface_format_year_as_text.csv') as f_in, open(f'../{dataset}/out.tsv','w') as f_out:
|
||
|
first_line = True
|
||
|
for line_in in tqdm(f_in, total=150_000):
|
||
|
if first_line:
|
||
|
first_line = False
|
||
|
continue
|
||
|
text = line_in.split('\t')[0]
|
||
|
text = text.rstrip('\n')
|
||
|
inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(device)
|
||
|
outputs = model(**inputs)
|
||
|
probs = outputs[0].softmax(1)
|
||
|
prediction = LABELS_LIST[probs.argmax(1)]
|
||
|
f_out.write(prediction + '\n')
|
||
|
|