Andrzej Preibisz a5fb14928c GPT-2
2023-02-12 20:03:40 +01:00

232 KiB
Raw Blame History

! pip install datasets transformers torch scikit-learn evaluate
!wget '' -O ''
!wget '' -O ''
!wget '' -O ''
import json
from pathlib import Path
from typing import Dict, List
from datasets import load_dataset

loaded_data = load_dataset('emotion')

!mkdir -v -p data

train_path = Path('data/train.json')
valid_path = Path('data/valid.json')
test_path = Path('data/test.json')
data_train, data_valid, data_test = [], [], []

for source_data, dataset, max_size in [
  (loaded_data['train'], data_train, None),
  (loaded_data['test'], data_valid, None),
  for i, data in enumerate(source_data):
    if max_size is not None and i >= max_size:
    data_line = {
      'label': int(data['label']),
      'text': data['text'],

print(f'Train: {len(data_train):6d}')
print(f'Valid: {len(data_valid):6d}')

data_class_1, data_class_2 = [], []

for data in data_valid:
  label = data['label']
  if label == 0:
  elif label == 1:

print(f'Label 1: {len(data_class_1):6d}')
print(f'Label 2: {len(data_class_2):6d}')

size_half_class_1 = int(len(data_class_1) / 2)
size_half_class_2 = int(len(data_class_2) / 2)

data_valid = data_class_1[:size_half_class_1] + data_class_2[:size_half_class_2]
data_test = data_class_1[size_half_class_1:] + data_class_2[size_half_class_2:]

print(f'Valid: {len(data_valid):6d}')
print(f'Test : {len(data_test):6d}')

    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise',

def save_as_translations(original_save_path: Path, data_to_save: List[Dict]) -> None:
    file_name = 's2s-' +
    file_path = original_save_path.parent / file_name

    print(f'Saving into: {file_path}')
    with open(file_path, 'wt') as f_write:
        for data_line in data_to_save:
            label = data_line['label']
            new_label = MAP_LABEL_TRANSLATION[label]
            data_line['label'] = new_label
            data_line_str = json.dumps(data_line)

for file_path, data_to_save in [(train_path, data_train), (valid_path, data_valid), (test_path, data_test)]:
  print(f'Saving into: {file_path}')
  with open(file_path, 'wt') as f_write:
    for data_line in data_to_save:
      data_line_str = json.dumps(data_line)
  save_as_translations(file_path, data_to_save)

mkdir: created directory 'data'
Train:  16000
Valid:   2000
Label 1:    581
Label 2:    695
Valid:    637
Test :    639
Saving into: data/train.json
Saving into: data/s2s-train.json
Saving into: data/valid.json
Saving into: data/s2s-valid.json
Saving into: data/test.json
Saving into: data/s2s-test.json

!head -n 2500 data/train.json > data/train-5k.json
!tail -n 2500 data/train.json >> data/train-5k.json
!wc -l data/train-5k.json
5000 data/train-5k.json
from pathlib import Path

for file_name in ["train", "valid", "test", "s2s-train", "s2s-valid", "s2s-test"]:
  print(f"=== {file_name} ===")
  all_text = Path(f"data/{file_name}.json").read_text().split('\n')
  text = all_text[:2500] + all_text[-2500:]
=== train ===
=== valid ===
=== test ===
=== s2s-train ===
=== s2s-valid ===
=== s2s-test ===
import os

os.environ['TOKENIZERS_PARALLELISM'] = 'true'
!python \
--cache_dir .cache_training \
--model_name_or_path gpt2 \
--custom_model gpt2_hidden \
--freeze_weights \
--train_file data/train-5k.json  \
--validation_file data/valid-5k.json \
--test_file data/test-5k.json \
--per_device_train_batch_size 24 \
--per_device_eval_batch_size 24 \
--do_train \
--do_eval \
--do_predict \
--max_seq_length 128 \
--learning_rate 2e-5 \
--num_train_epochs 5 \
--output_dir out/imdb-5k/gpt2
WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False
INFO:__main__:Training/evaluation parameters TrainingArguments(
INFO:__main__:load a local file for train: data/train-5k.json
INFO:__main__:load a local file for validation: data/valid-5k.json
INFO:__main__:load a local file for test: data/test-5k.json
Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 91.3kB/s]
[INFO|] 2023-02-12 18:48:12,394 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json
[INFO|] 2023-02-12 18:48:12,395 >> Model config GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5"
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
  "transformers_version": "4.26.1",
  "use_cache": true,
  "vocab_size": 50257

[INFO|] 2023-02-12 18:48:12,427 >> Could not locate the tokenizer configuration file, will try to use the model config instead.
[INFO|] 2023-02-12 18:48:12,460 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json
[INFO|] 2023-02-12 18:48:12,461 >> Model config GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
  "transformers_version": "4.26.1",
  "use_cache": true,
  "vocab_size": 50257

[INFO|] 2023-02-12 18:48:12,933 >> loading file vocab.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json
[INFO|] 2023-02-12 18:48:12,933 >> loading file merges.txt from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt
[INFO|] 2023-02-12 18:48:12,933 >> loading file tokenizer.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json
[INFO|] 2023-02-12 18:48:12,933 >> loading file added_tokens.json from cache at None
[INFO|] 2023-02-12 18:48:12,933 >> loading file special_tokens_map.json from cache at None
[INFO|] 2023-02-12 18:48:12,933 >> loading file tokenizer_config.json from cache at None
[INFO|] 2023-02-12 18:48:12,934 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json
[INFO|] 2023-02-12 18:48:12,935 >> Model config GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
  "transformers_version": "4.26.1",
  "use_cache": true,
  "vocab_size": 50257

INFO:__main__:Using hidden states in model: True
INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom
[INFO|] 2023-02-12 18:48:15,229 >> loading weights file pytorch_model.bin from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin
[INFO|] 2023-02-12 18:48:20,237 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.

[WARNING|] 2023-02-12 18:48:20,237 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_2.bias', 'score.out_proj.weight', 'score.dense_2.weight', 'score.dense_1_hidden.bias', 'score.dense_1_input.weight', 'score.dense_1_input.bias', 'score.dense_1_hidden.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
INFO:__main__:Freezing encoder weights
INFO:__main__:Freezing layer 1
INFO:__main__:Freezing layer 2
INFO:__main__:Freezing layer 3
INFO:__main__:Freezing layer 4
INFO:__main__:Freezing layer 5
INFO:__main__:Freezing layer 6
INFO:__main__:Freezing layer 7
INFO:__main__:Freezing layer 8
INFO:__main__:Freezing layer 9
INFO:__main__:Freezing layer 10
INFO:__main__:Freezing layer 11
INFO:__main__:Freezing layer 12
INFO:__main__:Freezing layer 13
INFO:__main__:Freezing layer 14
INFO:__main__:Freezing layer 15
INFO:__main__:Freezing layer 16
INFO:__main__:Freezing layer 17
INFO:__main__:Freezing layer 18
INFO:__main__:Freezing layer 19
INFO:__main__:Freezing layer 20
INFO:__main__:Freezing layer 21
INFO:__main__:Freezing layer 22
INFO:__main__:Freezing layer 23
INFO:__main__:Freezing layer 24
INFO:__main__:Freezing layer 25
INFO:__main__:Freezing layer 26
INFO:__main__:Freezing layer 27
INFO:__main__:Freezing layer 28
INFO:__main__:Freezing layer 29
INFO:__main__:Freezing layer 30
INFO:__main__:Freezing layer 31
INFO:__main__:Freezing layer 32
INFO:__main__:Freezing layer 33
INFO:__main__:Freezing layer 34
INFO:__main__:Freezing layer 35
INFO:__main__:Freezing layer 36
INFO:__main__:Freezing layer 37
INFO:__main__:Freezing layer 38
INFO:__main__:Freezing layer 39
INFO:__main__:Freezing layer 40
INFO:__main__:Set PAD token to EOS: <|endoftext|>
Running tokenizer on dataset: 100% 5/5 [00:01<00:00,  3.34ba/s]
Running tokenizer on dataset: 100% 2/2 [00:00<00:00,  2.35ba/s]
Running tokenizer on dataset: 100% 2/2 [00:00<00:00,  4.98ba/s]
INFO:__main__:Sample 912 of the training set: {'label': 2, 'text': 'i feel we need a little romantic boost in the relationship', 'input_ids': [72, 1254, 356, 761, 257, 1310, 14348, 5750, 287, 262, 2776, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.
INFO:__main__:Sample 204 of the training set: {'label': 1, 'text': 'i feel pretty mellow so far about whatever healing wounding process may be getting underway', 'input_ids': [72, 1254, 2495, 33748, 322, 523, 1290, 546, 4232, 11516, 40942, 1429, 743, 307, 1972, 17715, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.
INFO:__main__:Sample 2253 of the training set: {'label': 1, 'text': 'i feel ive answered those questions for her and shes pretty trusting for the most part', 'input_ids': [72, 1254, 220, 425, 9373, 883, 2683, 329, 607, 290, 673, 82, 2495, 33914, 329, 262, 749, 636, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.
[INFO|] 2023-02-12 18:48:30,921 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.
/usr/local/lib/python3.8/dist-packages/transformers/ FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
[INFO|] 2023-02-12 18:48:30,927 >> ***** Running training *****
[INFO|] 2023-02-12 18:48:30,927 >>   Num examples = 4999
[INFO|] 2023-02-12 18:48:30,927 >>   Num Epochs = 5
[INFO|] 2023-02-12 18:48:30,927 >>   Instantaneous batch size per device = 24
[INFO|] 2023-02-12 18:48:30,927 >>   Total train batch size (w. parallel, distributed & accumulation) = 24
[INFO|] 2023-02-12 18:48:30,927 >>   Gradient Accumulation steps = 1
[INFO|] 2023-02-12 18:48:30,927 >>   Total optimization steps = 1045
[INFO|] 2023-02-12 18:48:30,928 >>   Number of trainable parameters = 68517888
{'loss': 1.0247, 'learning_rate': 1.0430622009569378e-05, 'epoch': 2.39}
 48% 500/1045 [03:49<04:14,  2.14it/s][INFO|] 2023-02-12 18:52:20,075 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-500
[INFO|] 2023-02-12 18:52:20,076 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-500/config.json
[INFO|] 2023-02-12 18:52:21,822 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-500/pytorch_model.bin
[INFO|] 2023-02-12 18:52:21,823 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-500/tokenizer_config.json
[INFO|] 2023-02-12 18:52:21,823 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-500/special_tokens_map.json
{'loss': 0.3843, 'learning_rate': 8.612440191387561e-07, 'epoch': 4.78}
 96% 1000/1045 [07:46<00:20,  2.15it/s][INFO|] 2023-02-12 18:56:17,122 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-1000
[INFO|] 2023-02-12 18:56:17,123 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-1000/config.json
[INFO|] 2023-02-12 18:56:18,817 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-1000/pytorch_model.bin
[INFO|] 2023-02-12 18:56:18,817 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-1000/tokenizer_config.json
[INFO|] 2023-02-12 18:56:18,818 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-1000/special_tokens_map.json
100% 1045/1045 [08:10<00:00,  2.65it/s][INFO|] 2023-02-12 18:56:41,796 >> 

Training completed. Do not forget to share your model on =)

{'train_runtime': 490.8844, 'train_samples_per_second': 50.918, 'train_steps_per_second': 2.129, 'train_loss': 0.689463275015069, 'epoch': 5.0}
100% 1045/1045 [08:10<00:00,  2.13it/s]
[INFO|] 2023-02-12 18:56:41,814 >> Saving model checkpoint to out/imdb-5k/gpt2
[INFO|] 2023-02-12 18:56:41,815 >> Configuration saved in out/imdb-5k/gpt2/config.json
[INFO|] 2023-02-12 18:56:43,512 >> Model weights saved in out/imdb-5k/gpt2/pytorch_model.bin
[INFO|] 2023-02-12 18:56:43,513 >> tokenizer config file saved in out/imdb-5k/gpt2/tokenizer_config.json
[INFO|] 2023-02-12 18:56:43,513 >> Special tokens file saved in out/imdb-5k/gpt2/special_tokens_map.json
***** train metrics *****
  epoch                    =        5.0
  train_loss               =     0.6895
  train_runtime            = 0:08:10.88
  train_samples            =       4999
  train_samples_per_second =     50.918
  train_steps_per_second   =      2.129
INFO:__main__:*** Evaluate ***
[INFO|] 2023-02-12 18:56:43,641 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.
[INFO|] 2023-02-12 18:56:43,642 >> ***** Running Evaluation *****
[INFO|] 2023-02-12 18:56:43,642 >>   Num examples = 1274
[INFO|] 2023-02-12 18:56:43,642 >>   Batch size = 24
100% 54/54 [00:09<00:00,  5.50it/s]
***** eval metrics *****
  epoch                   =        5.0
  eval_accuracy           =     0.9231
  eval_loss               =     0.2178
  eval_runtime            = 0:00:10.05
  eval_samples            =       1274
  eval_samples_per_second =    126.717
  eval_steps_per_second   =      5.371
INFO:__main__:*** Predict ***
[INFO|] 2023-02-12 18:56:53,699 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.
[INFO|] 2023-02-12 18:56:53,701 >> ***** Running Prediction *****
[INFO|] 2023-02-12 18:56:53,701 >>   Num examples = 1278
[INFO|] 2023-02-12 18:56:53,701 >>   Batch size = 24
100% 54/54 [00:09<00:00,  5.49it/s]
INFO:__main__:***** Predict results None *****
[INFO|] 2023-02-12 18:57:03,752 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9230769276618958}]}
from transformers import GPT2Config, GPT2Tokenizer, GPT2Model, Trainer, TrainingArguments, GPT2ForSequenceClassification
from datasets import load_dataset
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

config=GPT2Config(vocab_size=2048, num_labels=6)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

model = GPT2ForSequenceClassification(config).from_pretrained('gpt2', num_labels=6)

def tokenization(batched_text):
    return tokenizer(batched_text['text'], return_tensors='pt', padding=True)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='micro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall

dataset = load_dataset("emotion")

train_data= dataset["train"]
test_data = dataset["test"]
eval_data = dataset["validation"]

train_data =, batched=True, batch_size=len(train_data))
eval_data =, batched=True, batch_size=len(eval_data))

train_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
eval_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

training_args = TrainingArguments(
    per_device_train_batch_size = 8,
    gradient_accumulation_steps = 16,    
    per_device_eval_batch_size= 8,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    logging_steps = 4,
    fp16 = True,
    dataloader_num_workers = 2,
    run_name = 'gpt-2-classification'

trainer = Trainer(


i = 0
sum_preds = 0
model ='cpu')
for line in test_data:

  inputs = tokenizer(line.get('text'), return_tensors="pt")
  labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
  outputs = model(**inputs, labels=labels)
  _, predictions = torch.max(outputs[1], 1)
  a = int(
  b = line.get('label')
  i += 1
  sum_preds += int(a == b)

print(f"ACCURACY: {(sum_preds/i * 100)}")
Assigning [PAD] to the pad_token key of the tokenizer
Adding [PAD] to the vocabulary
