Invalid JSON:
Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 1.52798593044281, | |
| "learning_rate": 9.756378189094549e-06, | |
| "loss": 0.1999, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 2.1572341918945312, | |
| "learning_rate": 9.506253126563282e-06, | |
| "loss": 0.0279, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 1.0278618335723877, | |
| "learning_rate": 9.256128064032017e-06, | |
| "loss": 0.0234, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.9378705024719238, | |
| "learning_rate": 9.006003001500752e-06, | |
| "loss": 0.0203, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 1.0253329277038574, | |
| "learning_rate": 8.755877938969486e-06, | |
| "loss": 0.0179, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.588853359222412, | |
| "learning_rate": 8.50575287643822e-06, | |
| "loss": 0.0171, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 2.365065336227417, | |
| "learning_rate": 8.255627813906954e-06, | |
| "loss": 0.0153, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.9309738874435425, | |
| "learning_rate": 8.005502751375689e-06, | |
| "loss": 0.015, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 0.4729306697845459, | |
| "learning_rate": 7.755377688844424e-06, | |
| "loss": 0.0143, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.5990219116210938, | |
| "learning_rate": 7.505252626313157e-06, | |
| "loss": 0.014, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 0.32973700761795044, | |
| "learning_rate": 7.255627813906953e-06, | |
| "loss": 0.0128, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.7276681661605835, | |
| "learning_rate": 7.0055027513756875e-06, | |
| "loss": 0.013, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.325, | |
| "grad_norm": 1.137271761894226, | |
| "learning_rate": 6.7553776888444225e-06, | |
| "loss": 0.0129, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.2594708800315857, | |
| "learning_rate": 6.505252626313157e-06, | |
| "loss": 0.0124, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 0.2336331158876419, | |
| "learning_rate": 6.255127563781892e-06, | |
| "loss": 0.012, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.19649972021579742, | |
| "learning_rate": 6.005002501250627e-06, | |
| "loss": 0.0121, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.425, | |
| "grad_norm": 0.5051653981208801, | |
| "learning_rate": 5.754877438719361e-06, | |
| "loss": 0.0115, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.3455007076263428, | |
| "learning_rate": 5.504752376188095e-06, | |
| "loss": 0.0113, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.475, | |
| "grad_norm": 0.4421294331550598, | |
| "learning_rate": 5.254627313656829e-06, | |
| "loss": 0.0106, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.2041834592819214, | |
| "learning_rate": 5.004502251125563e-06, | |
| "loss": 0.0107, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.525, | |
| "grad_norm": 1.119016408920288, | |
| "learning_rate": 4.754377188594297e-06, | |
| "loss": 0.011, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 0.9011972546577454, | |
| "learning_rate": 4.5042521260630315e-06, | |
| "loss": 0.0108, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.575, | |
| "grad_norm": 0.4205041229724884, | |
| "learning_rate": 4.2546273136568285e-06, | |
| "loss": 0.0103, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.22211983799934387, | |
| "learning_rate": 4.0045022511255635e-06, | |
| "loss": 0.0097, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.26337993144989014, | |
| "learning_rate": 3.7543771885942976e-06, | |
| "loss": 0.01, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 0.526462197303772, | |
| "learning_rate": 3.5042521260630318e-06, | |
| "loss": 0.0104, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.675, | |
| "grad_norm": 0.12982240319252014, | |
| "learning_rate": 3.2541270635317664e-06, | |
| "loss": 0.0096, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.8299207091331482, | |
| "learning_rate": 3.0040020010005005e-06, | |
| "loss": 0.0096, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.725, | |
| "grad_norm": 0.18750137090682983, | |
| "learning_rate": 2.753876938469235e-06, | |
| "loss": 0.0097, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.37416380643844604, | |
| "learning_rate": 2.5037518759379692e-06, | |
| "loss": 0.0097, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.775, | |
| "grad_norm": 0.47138354182243347, | |
| "learning_rate": 2.2536268134067034e-06, | |
| "loss": 0.0089, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.859859824180603, | |
| "learning_rate": 2.003501750875438e-06, | |
| "loss": 0.0089, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.825, | |
| "grad_norm": Infinity, | |
| "learning_rate": 1.7538769384692347e-06, | |
| "loss": 0.0091, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.4002706706523895, | |
| "learning_rate": 1.503751875937969e-06, | |
| "loss": 0.0085, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 0.29968053102493286, | |
| "learning_rate": 1.2536268134067034e-06, | |
| "loss": 0.0084, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.422002911567688, | |
| "learning_rate": 1.0035017508754378e-06, | |
| "loss": 0.0089, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.925, | |
| "grad_norm": 0.272173672914505, | |
| "learning_rate": 7.533766883441721e-07, | |
| "loss": 0.0087, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 0.2766880691051483, | |
| "learning_rate": 5.03751875937969e-07, | |
| "loss": 0.0087, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.975, | |
| "grad_norm": 0.17756783962249756, | |
| "learning_rate": 2.5362681340670335e-07, | |
| "loss": 0.0088, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5026212334632874, | |
| "learning_rate": 3.501750875437719e-09, | |
| "loss": 0.0089, | |
| "step": 20000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 20000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |