268 KiB
268 KiB
RobertaForSequenceClassification model classification training
Colab link better looking output:
https://colab.research.google.com/drive/1pLOmBL5MJGA_BzY3pGoYSXf_bivsoJlX?usp=sharing
Links:
- Tensorboard training: https://tensorboard.dev/experiment/Hq95VFzqTQ2CyBb1S4SOpw/#scalars
- Huggingface dataset edited: https://huggingface.co/datasets/Zombely/sst2-project-dataset
- Huggingface Trained model: https://huggingface.co/Zombely/RobertaForSequenceClassification-sst2
!pip install -q datasets transformers
[2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.8/462.8 KB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m213.0/213.0 KB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.0/132.0 KB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m43.6 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m [?25h
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizerFast, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers.integrations import TensorBoardCallback
def load_and_process_dataset():
dataset = load_dataset("sst2")
dataset.remove_columns('idx')
del dataset['test']
dataset['test'] = dataset['validation']
del dataset['validation']
split_dataset = dataset['train'].train_test_split(test_size=1600)
dataset['train'] = split_dataset['train']
dataset['validation'] = split_dataset['test']
return dataset
dataset = load_and_process_dataset()
dataset
Downloading builder script: 0%| | 0.00/3.77k [00:00<?, ?B/s]
Downloading metadata: 0%| | 0.00/1.85k [00:00<?, ?B/s]
Downloading readme: 0%| | 0.00/5.06k [00:00<?, ?B/s]
Downloading and preparing dataset sst2/default to /root/.cache/huggingface/datasets/sst2/default/2.0.0/9896208a8d85db057ac50c72282bcb8fe755accc671a57dd8059d4e130961ed5...
Downloading data: 0%| | 0.00/7.44M [00:00<?, ?B/s]
Generating train split: 0%| | 0/67349 [00:00<?, ? examples/s]
Generating validation split: 0%| | 0/872 [00:00<?, ? examples/s]
Generating test split: 0%| | 0/1821 [00:00<?, ? examples/s]
Dataset sst2 downloaded and prepared to /root/.cache/huggingface/datasets/sst2/default/2.0.0/9896208a8d85db057ac50c72282bcb8fe755accc671a57dd8059d4e130961ed5. Subsequent calls will reuse this data.
0%| | 0/3 [00:00<?, ?it/s]
DatasetDict({ train: Dataset({ features: ['idx', 'sentence', 'label'], num_rows: 65749 }) test: Dataset({ features: ['idx', 'sentence', 'label'], num_rows: 872 }) validation: Dataset({ features: ['idx', 'sentence', 'label'], num_rows: 1600 }) })
train = dataset['train']
validation = dataset['validation']
test = dataset['test']
model = RobertaForSequenceClassification.from_pretrained('roberta-base')
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', max_length = 512)
Downloading (…)lve/main/config.json: 0%| | 0.00/481 [00:00<?, ?B/s]
Downloading (…)"pytorch_model.bin";: 0%| | 0.00/501M [00:00<?, ?B/s]
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Downloading (…)olve/main/vocab.json: 0%| | 0.00/899k [00:00<?, ?B/s]
Downloading (…)olve/main/merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]
Downloading (…)/main/tokenizer.json: 0%| | 0.00/1.36M [00:00<?, ?B/s]
# define a function that will tokenize the model, and will return the relevant inputs for the model
def tokenization(batched_text):
return tokenizer(batched_text['sentence'], padding = True, truncation=True)
train_data = train.map(tokenization, batched = True, batch_size = len(train))
val_data = validation.map(tokenization, batched = True, batch_size = len(validation))
test_data = test.map(tokenization, batched = True, batch_size = len(test))
0%| | 0/1 [00:00<?, ?ba/s]
0%| | 0/1 [00:00<?, ?ba/s]
0%| | 0/1 [00:00<?, ?ba/s]
train_data.set_format('torch', columns=['input_ids', 'sentence', 'label'])
val_data.set_format('torch', columns=['input_ids', 'sentence', 'label'])
test_data.set_format('torch', columns=['input_ids', 'sentence', 'label'])
# define accuracy metrics
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
acc = accuracy_score(labels, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
# define the training arguments
training_args = TrainingArguments(
output_dir = './results',
num_train_epochs=3,
per_device_train_batch_size = 4,
gradient_accumulation_steps = 16,
per_device_eval_batch_size= 8,
evaluation_strategy = "epoch",
disable_tqdm = False,
load_best_model_at_end=False,
warmup_steps=500,
weight_decay=0.01,
logging_steps = 8,
fp16 = True,
logging_dir='./logs',
dataloader_num_workers = 2,
run_name = 'roberta-classification',
optim="adamw_torch"
)
trainer = Trainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_data,
eval_dataset=val_data,
callbacks=[TensorBoardCallback]
)
You are adding a <class 'transformers.integrations.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is :DefaultFlowCallback TensorBoardCallback Using cuda_amp half precision backend
trainer.train()
The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message. ***** Running training ***** Num examples = 65749 Num Epochs = 3 Instantaneous batch size per device = 4 Total train batch size (w. parallel, distributed & accumulation) = 64 Gradient Accumulation steps = 16 Total optimization steps = 3081 Number of trainable parameters = 124647170
[3081/3081 42:34, Epoch 2/3]
Epoch | Training Loss | Validation Loss | Accuracy | F1 | Precision | Recall |
---|---|---|---|---|---|---|
0 | 0.195700 | 0.158939 | 0.936250 | 0.942761 | 0.944882 | 0.940649 |
1 | 0.132900 | 0.146519 | 0.955000 | 0.959551 | 0.962796 | 0.956327 |
2 | 0.039700 | 0.150718 | 0.955625 | 0.960357 | 0.957684 | 0.963046 |
Saving model checkpoint to ./results/checkpoint-500 Configuration saved in ./results/checkpoint-500/config.json Model weights saved in ./results/checkpoint-500/pytorch_model.bin Saving model checkpoint to ./results/checkpoint-1000 Configuration saved in ./results/checkpoint-1000/config.json Model weights saved in ./results/checkpoint-1000/pytorch_model.bin The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message. ***** Running Evaluation ***** Num examples = 1600 Batch size = 8 Saving model checkpoint to ./results/checkpoint-1500 Configuration saved in ./results/checkpoint-1500/config.json Model weights saved in ./results/checkpoint-1500/pytorch_model.bin Saving model checkpoint to ./results/checkpoint-2000 Configuration saved in ./results/checkpoint-2000/config.json Model weights saved in ./results/checkpoint-2000/pytorch_model.bin The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message. ***** Running Evaluation ***** Num examples = 1600 Batch size = 8 Saving model checkpoint to ./results/checkpoint-2500 Configuration saved in ./results/checkpoint-2500/config.json Model weights saved in ./results/checkpoint-2500/pytorch_model.bin Saving model checkpoint to ./results/checkpoint-3000 Configuration saved in ./results/checkpoint-3000/config.json Model weights saved in ./results/checkpoint-3000/pytorch_model.bin The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message. ***** Running Evaluation ***** Num examples = 1600 Batch size = 8 Training completed. Do not forget to share your model on huggingface.co/models =)
TrainOutput(global_step=3081, training_loss=0.19893329894531087, metrics={'train_runtime': 2559.2258, 'train_samples_per_second': 77.073, 'train_steps_per_second': 1.204, 'total_flos': 6790599311126760.0, 'train_loss': 0.19893329894531087, 'epoch': 3.0})
trainer.evaluate()
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message. ***** Running Evaluation ***** Num examples = 1600 Batch size = 8
[200/200 00:03]
{'eval_loss': 0.15071792900562286, 'eval_accuracy': 0.955625, 'eval_f1': 0.96035734226689, 'eval_precision': 0.9576837416481069, 'eval_recall': 0.9630459126539753, 'eval_runtime': 3.7924, 'eval_samples_per_second': 421.898, 'eval_steps_per_second': 52.737, 'epoch': 3.0}
trainer.evaluate(test_data)
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message. ***** Running Evaluation ***** Num examples = 872 Batch size = 8
[200/200 00:05]
{'eval_loss': 0.20586328208446503, 'eval_accuracy': 0.9392201834862385, 'eval_f1': 0.9407821229050279, 'eval_precision': 0.9334811529933481, 'eval_recall': 0.9481981981981982, 'eval_runtime': 2.3748, 'eval_samples_per_second': 367.184, 'eval_steps_per_second': 45.898, 'epoch': 3.0}
!tensorboard dev upload --logdir logs --name RobertaForSequenceClassification
2023-02-13 13:06:49.916239: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia 2023-02-13 13:06:49.916330: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia 2023-02-13 13:06:49.916358: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. ***** TensorBoard Uploader ***** This will upload your TensorBoard logs to https://tensorboard.dev/ from the following directory: logs This TensorBoard will be visible to everyone. Do not upload sensitive data. Your use of this service is subject to Google's Terms of Service <https://policies.google.com/terms> and Privacy Policy <https://policies.google.com/privacy>, and TensorBoard.dev's Terms of Service <https://tensorboard.dev/policy/terms/>. This notice will not be shown again while you are logged into the uploader. To log out, run `tensorboard dev auth revoke`. Continue? (yes/NO) yes To sign in with the TensorBoard uploader: 1. On your computer or phone, visit: https://www.google.com/device 2. Sign in with your Google account, then enter: YBVF-QCSV Upload started and will continue reading any new data as it's added to the logdir. To stop uploading, press Ctrl-C. New experiment created. View your TensorBoard at: https://tensorboard.dev/experiment/Hq95VFzqTQ2CyBb1S4SOpw/ [1m[2023-02-13T13:07:05][0m Started scanning logdir. [1m[2023-02-13T13:07:06][0m Total uploaded: 2412 scalars, 10 tensors (7.1 kB), 0 binary objects Interrupted. View your TensorBoard at https://tensorboard.dev/experiment/Hq95VFzqTQ2CyBb1S4SOpw/ Traceback (most recent call last): File "/usr/local/bin/tensorboard", line 8, in <module> sys.exit(run_main()) File "/usr/local/lib/python3.8/dist-packages/tensorboard/main.py", line 46, in run_main app.run(tensorboard.main, flags_parser=tensorboard.configure) File "/usr/local/lib/python3.8/dist-packages/absl/app.py", line 308, in run _run_main(main, args) File "/usr/local/lib/python3.8/dist-packages/absl/app.py", line 254, in _run_main sys.exit(main(argv)) File "/usr/local/lib/python3.8/dist-packages/tensorboard/program.py", line 276, in main return runner(self.flags) or 0 File "/usr/local/lib/python3.8/dist-packages/tensorboard/uploader/uploader_subcommand.py", line 691, in run return _run(flags, self._experiment_url_callback) File "/usr/local/lib/python3.8/dist-packages/tensorboard/uploader/uploader_subcommand.py", line 124, in _run intent.execute(server_info, channel) File "/usr/local/lib/python3.8/dist-packages/tensorboard/uploader/uploader_subcommand.py", line 507, in execute sys.stdout.write(end_message + "\n") KeyboardInterrupt ^C
model.save_pretrained("./model")
Configuration saved in ./model/config.json Model weights saved in ./model/pytorch_model.bin
!huggingface-cli login
_| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens . Token: Add token as git credential? (Y/n) y Token is valid. [1m[31mCannot authenticate through git-credential as no helper is defined on your machine. You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set the 'store' credential helper as default. git config --global credential.helper store Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m Token has not been saved to git credential helper. Your token has been saved to /root/.cache/huggingface/token Login successful
model.push_to_hub("Zombely/RobertaForSequenceClassification-sst2")
Configuration saved in /tmp/tmpga7eb38a/config.json Model weights saved in /tmp/tmpga7eb38a/pytorch_model.bin Uploading the following files to Zombely/RobertaForSequenceClassification-sst2: config.json,pytorch_model.bin
Upload 1 LFS files: 0%| | 0/1 [00:00<?, ?it/s]
pytorch_model.bin: 0%| | 0.00/499M [00:00<?, ?B/s]
CommitInfo(commit_url='https://huggingface.co/Zombely/RobertaForSequenceClassification-sst2/commit/b9c78c4bdd91c2077b01e2109c77c30495c5a9e9', commit_message='Upload RobertaForSequenceClassification', commit_description='', oid='b9c78c4bdd91c2077b01e2109c77c30495c5a9e9', pr_url=None, pr_revision=None, pr_num=None)
!zip model -r model
adding: model/ (stored 0%) adding: model/config.json (deflated 51%) adding: model/pytorch_model.bin (deflated 12%)