GPT-2 NEW

This commit is contained in:
s444501 2023-02-13 00:49:10 +01:00
parent 24409ffb1b
commit 7640b12c35
10 changed files with 2393 additions and 1508 deletions

File diff suppressed because it is too large Load Diff

View File

@ -4,8 +4,8 @@
# Transformer Decoder - GPT-2 # Transformer Decoder - GPT-2
## Modyfikacje ## Modyfikacje
1. Zamrożenie pierwszych 40 warstw 1. Dodanie dodatkowej warstwy Linear do głowy
2. Zmiana głowy klasyfikacyjnej poprzez dodanie po 2 warstwy dropout i relu() 2. Wykorzystanie ukrytych stanów z t ostatnich warstw
# Transformer Encoder-Decoder - T5 # Transformer Encoder-Decoder - T5

View File

@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset. This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
It achieves the following results on the evaluation set: It achieves the following results on the evaluation set:
- Loss: 0.2178 - Loss: 0.1925
- Accuracy: 0.9231 - Accuracy: 0.9355
## Model description ## Model description
@ -36,13 +36,13 @@ More information needed
### Training hyperparameters ### Training hyperparameters
The following hyperparameters were used during training: The following hyperparameters were used during training:
- learning_rate: 2e-05 - learning_rate: 5e-05
- train_batch_size: 24 - train_batch_size: 8
- eval_batch_size: 24 - eval_batch_size: 8
- seed: 42 - seed: 42
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
- lr_scheduler_type: linear - lr_scheduler_type: linear
- num_epochs: 5.0 - num_epochs: 1.0
### Training results ### Training results

View File

@ -1,14 +1,14 @@
{ {
"epoch": 5.0, "epoch": 1.0,
"eval_accuracy": 0.9230769276618958, "eval_accuracy": 0.9355000257492065,
"eval_loss": 0.2177695333957672, "eval_loss": 0.19254431128501892,
"eval_runtime": 10.0539, "eval_runtime": 17.1165,
"eval_samples": 1274, "eval_samples": 2000,
"eval_samples_per_second": 126.717, "eval_samples_per_second": 116.846,
"eval_steps_per_second": 5.371, "eval_steps_per_second": 14.606,
"train_loss": 0.689463275015069, "train_loss": 0.4504347610473633,
"train_runtime": 490.8844, "train_runtime": 524.6759,
"train_samples": 4999, "train_samples": 16000,
"train_samples_per_second": 50.918, "train_samples_per_second": 30.495,
"train_steps_per_second": 2.129 "train_steps_per_second": 3.812
} }

View File

@ -1,9 +1,9 @@
{ {
"epoch": 5.0, "epoch": 1.0,
"eval_accuracy": 0.9230769276618958, "eval_accuracy": 0.9355000257492065,
"eval_loss": 0.2177695333957672, "eval_loss": 0.19254431128501892,
"eval_runtime": 10.0539, "eval_runtime": 17.1165,
"eval_samples": 1274, "eval_samples": 2000,
"eval_samples_per_second": 126.717, "eval_samples_per_second": 116.846,
"eval_steps_per_second": 5.371 "eval_steps_per_second": 14.606
} }

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,8 +1,8 @@
{ {
"epoch": 5.0, "epoch": 1.0,
"train_loss": 0.689463275015069, "train_loss": 0.4504347610473633,
"train_runtime": 490.8844, "train_runtime": 524.6759,
"train_samples": 4999, "train_samples": 16000,
"train_samples_per_second": 50.918, "train_samples_per_second": 30.495,
"train_steps_per_second": 2.129 "train_steps_per_second": 3.812
} }

View File

@ -1,37 +1,49 @@
{ {
"best_metric": null, "best_metric": null,
"best_model_checkpoint": null, "best_model_checkpoint": null,
"epoch": 5.0, "epoch": 1.0,
"global_step": 1045, "global_step": 2000,
"is_hyper_param_search": false, "is_hyper_param_search": false,
"is_local_process_zero": true, "is_local_process_zero": true,
"is_world_process_zero": true, "is_world_process_zero": true,
"log_history": [ "log_history": [
{ {
"epoch": 2.39, "epoch": 0.25,
"learning_rate": 1.0430622009569378e-05, "learning_rate": 3.7500000000000003e-05,
"loss": 1.0247, "loss": 0.9449,
"step": 500 "step": 500
}, },
{ {
"epoch": 4.78, "epoch": 0.5,
"learning_rate": 8.612440191387561e-07, "learning_rate": 2.5e-05,
"loss": 0.3843, "loss": 0.3705,
"step": 1000 "step": 1000
}, },
{ {
"epoch": 5.0, "epoch": 0.75,
"step": 1045, "learning_rate": 1.25e-05,
"total_flos": 1723489601126400.0, "loss": 0.264,
"train_loss": 0.689463275015069, "step": 1500
"train_runtime": 490.8844, },
"train_samples_per_second": 50.918, {
"train_steps_per_second": 2.129 "epoch": 1.0,
"learning_rate": 0.0,
"loss": 0.2223,
"step": 2000
},
{
"epoch": 1.0,
"step": 2000,
"total_flos": 1204741472256000.0,
"train_loss": 0.4504347610473633,
"train_runtime": 524.6759,
"train_samples_per_second": 30.495,
"train_steps_per_second": 3.812
} }
], ],
"max_steps": 1045, "max_steps": 2000,
"num_train_epochs": 5, "num_train_epochs": 1,
"total_flos": 1723489601126400.0, "total_flos": 1204741472256000.0,
"trial_name": null, "trial_name": null,
"trial_params": null "trial_params": null
} }

Binary file not shown.