cafebert-ViISA / last-checkpoint /trainer_state.json
HiAmNear's picture
Training in progress, epoch 10, checkpoint
4345e72 verified
{
"best_metric": 0.7774524158125915,
"best_model_checkpoint": "modelParams/TrainingArguments_output\\checkpoint-796",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1990,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.7635431918008785,
"eval_loss": 0.5437253713607788,
"eval_runtime": 119.9317,
"eval_samples_per_second": 11.39,
"eval_steps_per_second": 5.695,
"step": 199
},
{
"epoch": 2.0,
"eval_accuracy": 0.773792093704246,
"eval_loss": 0.5396685004234314,
"eval_runtime": 124.413,
"eval_samples_per_second": 10.98,
"eval_steps_per_second": 5.49,
"step": 398
},
{
"epoch": 2.5125628140703515,
"grad_norm": 592.5914916992188,
"learning_rate": 7.487437185929649e-06,
"loss": 0.5014,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.7730600292825769,
"eval_loss": 0.5995632410049438,
"eval_runtime": 59.0331,
"eval_samples_per_second": 23.14,
"eval_steps_per_second": 11.57,
"step": 597
},
{
"epoch": 4.0,
"eval_accuracy": 0.7774524158125915,
"eval_loss": 0.7029575109481812,
"eval_runtime": 123.6878,
"eval_samples_per_second": 11.044,
"eval_steps_per_second": 5.522,
"step": 796
},
{
"epoch": 5.0,
"eval_accuracy": 0.7693997071742313,
"eval_loss": 0.8647988438606262,
"eval_runtime": 112.0996,
"eval_samples_per_second": 12.186,
"eval_steps_per_second": 6.093,
"step": 995
},
{
"epoch": 5.025125628140704,
"grad_norm": 2496.794189453125,
"learning_rate": 4.974874371859297e-06,
"loss": 0.2005,
"step": 1000
},
{
"epoch": 6.0,
"eval_accuracy": 0.7642752562225475,
"eval_loss": 1.0910539627075195,
"eval_runtime": 111.8459,
"eval_samples_per_second": 12.213,
"eval_steps_per_second": 6.107,
"step": 1194
},
{
"epoch": 7.0,
"eval_accuracy": 0.7723279648609077,
"eval_loss": 1.2653732299804688,
"eval_runtime": 110.9037,
"eval_samples_per_second": 12.317,
"eval_steps_per_second": 6.158,
"step": 1393
},
{
"epoch": 7.5376884422110555,
"grad_norm": 1611.986083984375,
"learning_rate": 2.462311557788945e-06,
"loss": 0.0881,
"step": 1500
},
{
"epoch": 8.0,
"eval_accuracy": 0.773792093704246,
"eval_loss": 1.5597944259643555,
"eval_runtime": 115.9058,
"eval_samples_per_second": 11.785,
"eval_steps_per_second": 5.893,
"step": 1592
},
{
"epoch": 9.0,
"eval_accuracy": 0.7693997071742313,
"eval_loss": 1.7513818740844727,
"eval_runtime": 113.5331,
"eval_samples_per_second": 12.032,
"eval_steps_per_second": 6.016,
"step": 1791
},
{
"epoch": 10.0,
"eval_accuracy": 0.7679355783308931,
"eval_loss": 1.8216043710708618,
"eval_runtime": 170.3389,
"eval_samples_per_second": 8.019,
"eval_steps_per_second": 4.01,
"step": 1990
}
],
"logging_steps": 500,
"max_steps": 1990,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.95568855112096e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}