update roberta script
This commit is contained in:
parent
99eb245d6c
commit
8cf99e9f55
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,7 @@ import torch
|
|||||||
|
|
||||||
PATHS = ['train/in.tsv', 'train/expected.tsv', 'dev-0/in.tsv', 'test-A/in.tsv', './dev-0/out.tsv', './test-A/out.tsv']
|
PATHS = ['train/in.tsv', 'train/expected.tsv', 'dev-0/in.tsv', 'test-A/in.tsv', './dev-0/out.tsv', './test-A/out.tsv']
|
||||||
OUTPUT_PATHS = ['dev-0/out.tsv', 'test-A/out.tsv']
|
OUTPUT_PATHS = ['dev-0/out.tsv', 'test-A/out.tsv']
|
||||||
PRE_TRAINED = 'roberta-base'
|
PRE_TRAINED = ['roberta-base']
|
||||||
|
|
||||||
def get_data(path):
|
def get_data(path):
|
||||||
data = []
|
data = []
|
||||||
@ -32,8 +32,8 @@ class IMDbDataset(torch.utils.data.Dataset):
|
|||||||
return len(self.labels)
|
return len(self.labels)
|
||||||
|
|
||||||
def prepare(data_train_X, data_train_Y):
|
def prepare(data_train_X, data_train_Y):
|
||||||
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED)
|
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED[0])
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED, num_labels=2)
|
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED[0], num_labels=2)
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
model.to(device)
|
model.to(device)
|
||||||
encoded_input = tokenizer([text[0] for text in list(zip(data_train_X, data_train_Y))], truncation=True, padding=True)
|
encoded_input = tokenizer([text[0] for text in list(zip(data_train_X, data_train_Y))], truncation=True, padding=True)
|
||||||
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user