GPT-2 NEW
This commit is contained in:
parent
24409ffb1b
commit
7640b12c35
1303
GPT_2.ipynb
1303
GPT_2.ipynb
File diff suppressed because it is too large
Load Diff
@ -4,8 +4,8 @@
|
|||||||
|
|
||||||
# Transformer Decoder - GPT-2
|
# Transformer Decoder - GPT-2
|
||||||
## Modyfikacje
|
## Modyfikacje
|
||||||
1. Zamrożenie pierwszych 40 warstw
|
1. Dodanie dodatkowej warstwy Linear do głowy
|
||||||
2. Zmiana głowy klasyfikacyjnej poprzez dodanie po 2 warstwy dropout i relu()
|
2. Wykorzystanie ukrytych stanów z t ostatnich warstw
|
||||||
|
|
||||||
|
|
||||||
# Transformer Encoder-Decoder - T5
|
# Transformer Encoder-Decoder - T5
|
||||||
|
@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|||||||
|
|
||||||
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
|
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
|
||||||
It achieves the following results on the evaluation set:
|
It achieves the following results on the evaluation set:
|
||||||
- Loss: 0.2178
|
- Loss: 0.1925
|
||||||
- Accuracy: 0.9231
|
- Accuracy: 0.9355
|
||||||
|
|
||||||
## Model description
|
## Model description
|
||||||
|
|
||||||
@ -36,13 +36,13 @@ More information needed
|
|||||||
### Training hyperparameters
|
### Training hyperparameters
|
||||||
|
|
||||||
The following hyperparameters were used during training:
|
The following hyperparameters were used during training:
|
||||||
- learning_rate: 2e-05
|
- learning_rate: 5e-05
|
||||||
- train_batch_size: 24
|
- train_batch_size: 8
|
||||||
- eval_batch_size: 24
|
- eval_batch_size: 8
|
||||||
- seed: 42
|
- seed: 42
|
||||||
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
||||||
- lr_scheduler_type: linear
|
- lr_scheduler_type: linear
|
||||||
- num_epochs: 5.0
|
- num_epochs: 1.0
|
||||||
|
|
||||||
### Training results
|
### Training results
|
||||||
|
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
{
|
{
|
||||||
"epoch": 5.0,
|
"epoch": 1.0,
|
||||||
"eval_accuracy": 0.9230769276618958,
|
"eval_accuracy": 0.9355000257492065,
|
||||||
"eval_loss": 0.2177695333957672,
|
"eval_loss": 0.19254431128501892,
|
||||||
"eval_runtime": 10.0539,
|
"eval_runtime": 17.1165,
|
||||||
"eval_samples": 1274,
|
"eval_samples": 2000,
|
||||||
"eval_samples_per_second": 126.717,
|
"eval_samples_per_second": 116.846,
|
||||||
"eval_steps_per_second": 5.371,
|
"eval_steps_per_second": 14.606,
|
||||||
"train_loss": 0.689463275015069,
|
"train_loss": 0.4504347610473633,
|
||||||
"train_runtime": 490.8844,
|
"train_runtime": 524.6759,
|
||||||
"train_samples": 4999,
|
"train_samples": 16000,
|
||||||
"train_samples_per_second": 50.918,
|
"train_samples_per_second": 30.495,
|
||||||
"train_steps_per_second": 2.129
|
"train_steps_per_second": 3.812
|
||||||
}
|
}
|
@ -1,9 +1,9 @@
|
|||||||
{
|
{
|
||||||
"epoch": 5.0,
|
"epoch": 1.0,
|
||||||
"eval_accuracy": 0.9230769276618958,
|
"eval_accuracy": 0.9355000257492065,
|
||||||
"eval_loss": 0.2177695333957672,
|
"eval_loss": 0.19254431128501892,
|
||||||
"eval_runtime": 10.0539,
|
"eval_runtime": 17.1165,
|
||||||
"eval_samples": 1274,
|
"eval_samples": 2000,
|
||||||
"eval_samples_per_second": 126.717,
|
"eval_samples_per_second": 116.846,
|
||||||
"eval_steps_per_second": 5.371
|
"eval_steps_per_second": 14.606
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load Diff
BIN
models/gpt2/pytorch_model.bin
Normal file
BIN
models/gpt2/pytorch_model.bin
Normal file
Binary file not shown.
@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"epoch": 5.0,
|
"epoch": 1.0,
|
||||||
"train_loss": 0.689463275015069,
|
"train_loss": 0.4504347610473633,
|
||||||
"train_runtime": 490.8844,
|
"train_runtime": 524.6759,
|
||||||
"train_samples": 4999,
|
"train_samples": 16000,
|
||||||
"train_samples_per_second": 50.918,
|
"train_samples_per_second": 30.495,
|
||||||
"train_steps_per_second": 2.129
|
"train_steps_per_second": 3.812
|
||||||
}
|
}
|
@ -1,37 +1,49 @@
|
|||||||
{
|
{
|
||||||
"best_metric": null,
|
"best_metric": null,
|
||||||
"best_model_checkpoint": null,
|
"best_model_checkpoint": null,
|
||||||
"epoch": 5.0,
|
"epoch": 1.0,
|
||||||
"global_step": 1045,
|
"global_step": 2000,
|
||||||
"is_hyper_param_search": false,
|
"is_hyper_param_search": false,
|
||||||
"is_local_process_zero": true,
|
"is_local_process_zero": true,
|
||||||
"is_world_process_zero": true,
|
"is_world_process_zero": true,
|
||||||
"log_history": [
|
"log_history": [
|
||||||
{
|
{
|
||||||
"epoch": 2.39,
|
"epoch": 0.25,
|
||||||
"learning_rate": 1.0430622009569378e-05,
|
"learning_rate": 3.7500000000000003e-05,
|
||||||
"loss": 1.0247,
|
"loss": 0.9449,
|
||||||
"step": 500
|
"step": 500
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"epoch": 4.78,
|
"epoch": 0.5,
|
||||||
"learning_rate": 8.612440191387561e-07,
|
"learning_rate": 2.5e-05,
|
||||||
"loss": 0.3843,
|
"loss": 0.3705,
|
||||||
"step": 1000
|
"step": 1000
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"epoch": 5.0,
|
"epoch": 0.75,
|
||||||
"step": 1045,
|
"learning_rate": 1.25e-05,
|
||||||
"total_flos": 1723489601126400.0,
|
"loss": 0.264,
|
||||||
"train_loss": 0.689463275015069,
|
"step": 1500
|
||||||
"train_runtime": 490.8844,
|
},
|
||||||
"train_samples_per_second": 50.918,
|
{
|
||||||
"train_steps_per_second": 2.129
|
"epoch": 1.0,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 0.2223,
|
||||||
|
"step": 2000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"step": 2000,
|
||||||
|
"total_flos": 1204741472256000.0,
|
||||||
|
"train_loss": 0.4504347610473633,
|
||||||
|
"train_runtime": 524.6759,
|
||||||
|
"train_samples_per_second": 30.495,
|
||||||
|
"train_steps_per_second": 3.812
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"max_steps": 1045,
|
"max_steps": 2000,
|
||||||
"num_train_epochs": 5,
|
"num_train_epochs": 1,
|
||||||
"total_flos": 1723489601126400.0,
|
"total_flos": 1204741472256000.0,
|
||||||
"trial_name": null,
|
"trial_name": null,
|
||||||
"trial_params": null
|
"trial_params": null
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user