update roberta script
This commit is contained in:
parent
99eb245d6c
commit
8cf99e9f55
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,7 @@ import torch
|
||||
|
||||
PATHS = ['train/in.tsv', 'train/expected.tsv', 'dev-0/in.tsv', 'test-A/in.tsv', './dev-0/out.tsv', './test-A/out.tsv']
|
||||
OUTPUT_PATHS = ['dev-0/out.tsv', 'test-A/out.tsv']
|
||||
PRE_TRAINED = 'roberta-base'
|
||||
PRE_TRAINED = ['roberta-base']
|
||||
|
||||
def get_data(path):
|
||||
data = []
|
||||
@ -32,8 +32,8 @@ class IMDbDataset(torch.utils.data.Dataset):
|
||||
return len(self.labels)
|
||||
|
||||
def prepare(data_train_X, data_train_Y):
|
||||
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED)
|
||||
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED, num_labels=2)
|
||||
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED[0])
|
||||
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED[0], num_labels=2)
|
||||
device = torch.device("cpu")
|
||||
model.to(device)
|
||||
encoded_input = tokenizer([text[0] for text in list(zip(data_train_X, data_train_Y))], truncation=True, padding=True)
|
||||
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user