diff --git a/fine_tuning.py b/fine_tuning.py index c12b7b9..93dcfca 100644 --- a/fine_tuning.py +++ b/fine_tuning.py @@ -29,7 +29,7 @@ class CustomDataset(torch.utils.data.Dataset): return len(self.encodings["input_ids"]) data_train = list(zip(data_train_X, data_train_Y)) -data_train = random.sample(data_train, 5000) +data_train = random.sample(data_train, 180000) tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") train_X = tokenizer([text[0] for text in data_train], truncation=True, padding=True)