38 lines
836 B
JSON
38 lines
836 B
JSON
|
{
|
||
|
"best_metric": null,
|
||
|
"best_model_checkpoint": null,
|
||
|
"epoch": 5.0,
|
||
|
"global_step": 1045,
|
||
|
"is_hyper_param_search": false,
|
||
|
"is_local_process_zero": true,
|
||
|
"is_world_process_zero": true,
|
||
|
"log_history": [
|
||
|
{
|
||
|
"epoch": 2.39,
|
||
|
"learning_rate": 1.0430622009569378e-05,
|
||
|
"loss": 1.0247,
|
||
|
"step": 500
|
||
|
},
|
||
|
{
|
||
|
"epoch": 4.78,
|
||
|
"learning_rate": 8.612440191387561e-07,
|
||
|
"loss": 0.3843,
|
||
|
"step": 1000
|
||
|
},
|
||
|
{
|
||
|
"epoch": 5.0,
|
||
|
"step": 1045,
|
||
|
"total_flos": 1723489601126400.0,
|
||
|
"train_loss": 0.689463275015069,
|
||
|
"train_runtime": 490.8844,
|
||
|
"train_samples_per_second": 50.918,
|
||
|
"train_steps_per_second": 2.129
|
||
|
}
|
||
|
],
|
||
|
"max_steps": 1045,
|
||
|
"num_train_epochs": 5,
|
||
|
"total_flos": 1723489601126400.0,
|
||
|
"trial_name": null,
|
||
|
"trial_params": null
|
||
|
}
|