| { |
| "best_metric": 85.87243015287297, |
| "best_model_checkpoint": "./Shukv4/checkpoint-500", |
| "epoch": 5.376344086021505, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13440860215053763, |
| "grad_norm": 349994.25, |
| "learning_rate": 3.125e-06, |
| "loss": 0.5555, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.26881720430107525, |
| "grad_norm": 296199.8125, |
| "learning_rate": 6.25e-06, |
| "loss": 0.3776, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4032258064516129, |
| "grad_norm": 218550.265625, |
| "learning_rate": 9.375000000000001e-06, |
| "loss": 0.286, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5376344086021505, |
| "grad_norm": 241839.0, |
| "learning_rate": 1.25e-05, |
| "loss": 0.2629, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6720430107526881, |
| "grad_norm": 224651.953125, |
| "learning_rate": 1.2152777777777779e-05, |
| "loss": 0.2473, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 247373.3125, |
| "learning_rate": 1.1805555555555555e-05, |
| "loss": 0.2225, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9408602150537635, |
| "grad_norm": 232539.5, |
| "learning_rate": 1.1458333333333333e-05, |
| "loss": 0.1814, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.075268817204301, |
| "grad_norm": 208122.359375, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.1379, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.2096774193548387, |
| "grad_norm": 203154.734375, |
| "learning_rate": 1.076388888888889e-05, |
| "loss": 0.0926, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.3440860215053765, |
| "grad_norm": 193545.453125, |
| "learning_rate": 1.0416666666666668e-05, |
| "loss": 0.0877, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.478494623655914, |
| "grad_norm": 159916.4375, |
| "learning_rate": 1.0069444444444445e-05, |
| "loss": 0.0759, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 203974.796875, |
| "learning_rate": 9.722222222222223e-06, |
| "loss": 0.0886, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7473118279569892, |
| "grad_norm": 176038.296875, |
| "learning_rate": 9.375000000000001e-06, |
| "loss": 0.0809, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.881720430107527, |
| "grad_norm": 156732.640625, |
| "learning_rate": 9.027777777777777e-06, |
| "loss": 0.0598, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.0161290322580645, |
| "grad_norm": 116201.7734375, |
| "learning_rate": 8.680555555555556e-06, |
| "loss": 0.0546, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.150537634408602, |
| "grad_norm": 81153.625, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.0281, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.28494623655914, |
| "grad_norm": 95937.9765625, |
| "learning_rate": 7.98611111111111e-06, |
| "loss": 0.0328, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.4193548387096775, |
| "grad_norm": 105021.359375, |
| "learning_rate": 7.63888888888889e-06, |
| "loss": 0.0271, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.553763440860215, |
| "grad_norm": 73798.6796875, |
| "learning_rate": 7.2916666666666674e-06, |
| "loss": 0.0317, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.688172043010753, |
| "grad_norm": 111273.5546875, |
| "learning_rate": 6.944444444444445e-06, |
| "loss": 0.0286, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.688172043010753, |
| "eval_loss": 0.032379038631916046, |
| "eval_runtime": 903.1767, |
| "eval_samples_per_second": 0.227, |
| "eval_steps_per_second": 0.029, |
| "eval_wer": 85.87243015287297, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.8225806451612905, |
| "grad_norm": 115137.6015625, |
| "learning_rate": 6.597222222222223e-06, |
| "loss": 0.0293, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.956989247311828, |
| "grad_norm": 148418.078125, |
| "learning_rate": 6.25e-06, |
| "loss": 0.0264, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.0913978494623655, |
| "grad_norm": 44248.91796875, |
| "learning_rate": 5.902777777777778e-06, |
| "loss": 0.0157, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.225806451612903, |
| "grad_norm": 86631.046875, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 0.0101, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.360215053763441, |
| "grad_norm": 69759.2421875, |
| "learning_rate": 5.208333333333334e-06, |
| "loss": 0.0175, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.4946236559139785, |
| "grad_norm": 79651.625, |
| "learning_rate": 4.861111111111111e-06, |
| "loss": 0.0121, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.629032258064516, |
| "grad_norm": 78669.6171875, |
| "learning_rate": 4.513888888888889e-06, |
| "loss": 0.0137, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.763440860215054, |
| "grad_norm": 104462.546875, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.0114, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.8978494623655915, |
| "grad_norm": 79927.2109375, |
| "learning_rate": 3.819444444444445e-06, |
| "loss": 0.0105, |
| "step": 725 |
| }, |
| { |
| "epoch": 4.032258064516129, |
| "grad_norm": 69011.8984375, |
| "learning_rate": 3.4722222222222224e-06, |
| "loss": 0.0074, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 43131.57421875, |
| "learning_rate": 3.125e-06, |
| "loss": 0.0036, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.301075268817204, |
| "grad_norm": 15227.64453125, |
| "learning_rate": 2.777777777777778e-06, |
| "loss": 0.0038, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.435483870967742, |
| "grad_norm": 27650.37890625, |
| "learning_rate": 2.4305555555555557e-06, |
| "loss": 0.0082, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.56989247311828, |
| "grad_norm": 146017.515625, |
| "learning_rate": 2.0833333333333334e-06, |
| "loss": 0.0066, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.704301075268817, |
| "grad_norm": 41058.7421875, |
| "learning_rate": 1.7361111111111112e-06, |
| "loss": 0.0047, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.838709677419355, |
| "grad_norm": 6614.59765625, |
| "learning_rate": 1.388888888888889e-06, |
| "loss": 0.0051, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.973118279569892, |
| "grad_norm": 24049.263671875, |
| "learning_rate": 1.0416666666666667e-06, |
| "loss": 0.0026, |
| "step": 925 |
| }, |
| { |
| "epoch": 5.10752688172043, |
| "grad_norm": 42294.18359375, |
| "learning_rate": 6.944444444444445e-07, |
| "loss": 0.0047, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.241935483870968, |
| "grad_norm": 9962.9453125, |
| "learning_rate": 3.4722222222222224e-07, |
| "loss": 0.0013, |
| "step": 975 |
| }, |
| { |
| "epoch": 5.376344086021505, |
| "grad_norm": 18759.73828125, |
| "learning_rate": 0.0, |
| "loss": 0.0047, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.376344086021505, |
| "eval_loss": 0.0025792771484702826, |
| "eval_runtime": 847.1283, |
| "eval_samples_per_second": 0.242, |
| "eval_steps_per_second": 0.031, |
| "eval_wer": 96.36267791249341, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.4543672418304e+18, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|