update roberta script

This commit is contained in:
s440058 2021-06-22 14:25:22 +02:00
parent 99eb245d6c
commit 8cf99e9f55
3 changed files with 10427 additions and 3 deletions

5272
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@ import torch
PATHS = ['train/in.tsv', 'train/expected.tsv', 'dev-0/in.tsv', 'test-A/in.tsv', './dev-0/out.tsv', './test-A/out.tsv']
OUTPUT_PATHS = ['dev-0/out.tsv', 'test-A/out.tsv']
PRE_TRAINED = 'roberta-base'
PRE_TRAINED = ['roberta-base']
def get_data(path):
data = []
@ -32,8 +32,8 @@ class IMDbDataset(torch.utils.data.Dataset):
return len(self.labels)
def prepare(data_train_X, data_train_Y):
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED)
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED[0])
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED[0], num_labels=2)
device = torch.device("cpu")
model.to(device)
encoded_input = tokenizer([text[0] for text in list(zip(data_train_X, data_train_Y))], truncation=True, padding=True)

5152
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff