This commit is contained in:
Wojciech Jarmosz 2021-06-22 00:28:06 +02:00
parent 6939244314
commit 6ad8ba24ba

View File

@ -29,7 +29,7 @@ class CustomDataset(torch.utils.data.Dataset):
return len(self.encodings["input_ids"]) return len(self.encodings["input_ids"])
data_train = list(zip(data_train_X, data_train_Y)) data_train = list(zip(data_train_X, data_train_Y))
data_train = random.sample(data_train, 180000) data_train = random.sample(data_train, 50000)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
train_X = tokenizer([text[0] for text in data_train], truncation=True, padding=True) train_X = tokenizer([text[0] for text in data_train], truncation=True, padding=True)