update roberta script

This commit is contained in:
s440058 2021-06-22 14:22:05 +02:00
parent fb4b0d95e3
commit e348d16dde
3 changed files with 1413 additions and 1411 deletions

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,8 @@ class IMDbDataset(torch.utils.data.Dataset):
def prepare(data_train_X, data_train_Y): def prepare(data_train_X, data_train_Y):
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED) tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED)
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED, num_labels=2) model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED, num_labels=2)
device = torch.device("cpu")
model.to(device)
encoded_input = tokenizer([text[0] for text in list(zip(data_train_X, data_train_Y))], truncation=True, padding=True) encoded_input = tokenizer([text[0] for text in list(zip(data_train_X, data_train_Y))], truncation=True, padding=True)
train_dataset = IMDbDataset(encoded_input , [int(text[1]) for text in list(zip(data_train_X, data_train_Y))]) train_dataset = IMDbDataset(encoded_input , [int(text[1]) for text in list(zip(data_train_X, data_train_Y))])

File diff suppressed because it is too large Load Diff