| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.934292462958986, | |
| "eval_steps": 500, | |
| "global_step": 2900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.732, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 12.4473, | |
| "eval_bp": 0.7805, | |
| "eval_counts_1": 7779, | |
| "eval_counts_2": 2893, | |
| "eval_counts_3": 1393, | |
| "eval_counts_4": 685, | |
| "eval_exact_match": 0.0168, | |
| "eval_f1": 0.3393, | |
| "eval_gen_len": 12.2523, | |
| "eval_loss": 1.2989141941070557, | |
| "eval_precisions_1": 45.6809, | |
| "eval_precisions_2": 19.5143, | |
| "eval_precisions_3": 11.0372, | |
| "eval_precisions_4": 6.5758, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3487, | |
| "eval_rouge2": 0.1796, | |
| "eval_rougeL": 0.3329, | |
| "eval_rougeLsum": 0.3327, | |
| "eval_runtime": 2048.1193, | |
| "eval_samples_per_second": 1.076, | |
| "eval_steps_per_second": 1.076, | |
| "eval_sys_len": 17029, | |
| "eval_totals_1": 17029, | |
| "eval_totals_2": 14825, | |
| "eval_totals_3": 12621, | |
| "eval_totals_4": 10417, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5514, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 14.7663, | |
| "eval_bp": 0.7871, | |
| "eval_counts_1": 8297, | |
| "eval_counts_2": 3336, | |
| "eval_counts_3": 1711, | |
| "eval_counts_4": 899, | |
| "eval_exact_match": 0.025, | |
| "eval_f1": 0.3743, | |
| "eval_gen_len": 12.441, | |
| "eval_loss": 1.20997154712677, | |
| "eval_precisions_1": 48.3931, | |
| "eval_precisions_2": 22.3278, | |
| "eval_precisions_3": 13.4333, | |
| "eval_precisions_4": 8.5351, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3839, | |
| "eval_rouge2": 0.2089, | |
| "eval_rougeL": 0.3688, | |
| "eval_rougeLsum": 0.369, | |
| "eval_runtime": 2570.171, | |
| "eval_samples_per_second": 0.858, | |
| "eval_steps_per_second": 0.858, | |
| "eval_sys_len": 17145, | |
| "eval_totals_1": 17145, | |
| "eval_totals_2": 14941, | |
| "eval_totals_3": 12737, | |
| "eval_totals_4": 10533, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3546, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 16.3903, | |
| "eval_bp": 0.7798, | |
| "eval_counts_1": 8930, | |
| "eval_counts_2": 3713, | |
| "eval_counts_3": 1905, | |
| "eval_counts_4": 1022, | |
| "eval_exact_match": 0.034, | |
| "eval_f1": 0.4155, | |
| "eval_gen_len": 12.6021, | |
| "eval_loss": 1.142831563949585, | |
| "eval_precisions_1": 52.4739, | |
| "eval_precisions_2": 25.0641, | |
| "eval_precisions_3": 15.1071, | |
| "eval_precisions_4": 9.8213, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4225, | |
| "eval_rouge2": 0.2345, | |
| "eval_rougeL": 0.4075, | |
| "eval_rougeLsum": 0.4074, | |
| "eval_runtime": 2923.7087, | |
| "eval_samples_per_second": 0.754, | |
| "eval_steps_per_second": 0.754, | |
| "eval_sys_len": 17018, | |
| "eval_totals_1": 17018, | |
| "eval_totals_2": 14814, | |
| "eval_totals_3": 12610, | |
| "eval_totals_4": 10406, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1969, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 17.8161, | |
| "eval_bp": 0.8441, | |
| "eval_counts_1": 9456, | |
| "eval_counts_2": 3994, | |
| "eval_counts_3": 2096, | |
| "eval_counts_4": 1157, | |
| "eval_exact_match": 0.0386, | |
| "eval_f1": 0.4334, | |
| "eval_gen_len": 13.4061, | |
| "eval_loss": 1.1113450527191162, | |
| "eval_precisions_1": 52.039, | |
| "eval_precisions_2": 25.0141, | |
| "eval_precisions_3": 15.2292, | |
| "eval_precisions_4": 10.0095, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4409, | |
| "eval_rouge2": 0.246, | |
| "eval_rougeL": 0.4251, | |
| "eval_rougeLsum": 0.4251, | |
| "eval_runtime": 2741.9646, | |
| "eval_samples_per_second": 0.804, | |
| "eval_steps_per_second": 0.804, | |
| "eval_sys_len": 18171, | |
| "eval_totals_1": 18171, | |
| "eval_totals_2": 15967, | |
| "eval_totals_3": 13763, | |
| "eval_totals_4": 11559, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0876, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 18.6911, | |
| "eval_bp": 0.8446, | |
| "eval_counts_1": 9606, | |
| "eval_counts_2": 4162, | |
| "eval_counts_3": 2233, | |
| "eval_counts_4": 1243, | |
| "eval_exact_match": 0.0377, | |
| "eval_f1": 0.443, | |
| "eval_gen_len": 13.5599, | |
| "eval_loss": 1.1031831502914429, | |
| "eval_precisions_1": 52.8412, | |
| "eval_precisions_2": 26.0532, | |
| "eval_precisions_3": 16.2152, | |
| "eval_precisions_4": 10.7461, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4504, | |
| "eval_rouge2": 0.2571, | |
| "eval_rougeL": 0.4356, | |
| "eval_rougeLsum": 0.4357, | |
| "eval_runtime": 3812.6899, | |
| "eval_samples_per_second": 0.578, | |
| "eval_steps_per_second": 0.578, | |
| "eval_sys_len": 18179, | |
| "eval_totals_1": 18179, | |
| "eval_totals_2": 15975, | |
| "eval_totals_3": 13771, | |
| "eval_totals_4": 11567, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9881, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 18.7071, | |
| "eval_bp": 0.8481, | |
| "eval_counts_1": 9608, | |
| "eval_counts_2": 4167, | |
| "eval_counts_3": 2235, | |
| "eval_counts_4": 1246, | |
| "eval_exact_match": 0.044, | |
| "eval_f1": 0.4429, | |
| "eval_gen_len": 13.6978, | |
| "eval_loss": 1.1118519306182861, | |
| "eval_precisions_1": 52.661, | |
| "eval_precisions_2": 25.9772, | |
| "eval_precisions_3": 16.1523, | |
| "eval_precisions_4": 10.7109, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4505, | |
| "eval_rouge2": 0.2567, | |
| "eval_rougeL": 0.4348, | |
| "eval_rougeLsum": 0.4349, | |
| "eval_runtime": 2020.0708, | |
| "eval_samples_per_second": 1.091, | |
| "eval_steps_per_second": 1.091, | |
| "eval_sys_len": 18245, | |
| "eval_totals_1": 18245, | |
| "eval_totals_2": 16041, | |
| "eval_totals_3": 13837, | |
| "eval_totals_4": 11633, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9142, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu": 19.3053, | |
| "eval_bp": 0.8506, | |
| "eval_counts_1": 9757, | |
| "eval_counts_2": 4285, | |
| "eval_counts_3": 2311, | |
| "eval_counts_4": 1310, | |
| "eval_exact_match": 0.0495, | |
| "eval_f1": 0.451, | |
| "eval_gen_len": 13.5826, | |
| "eval_loss": 1.1105936765670776, | |
| "eval_precisions_1": 53.3432, | |
| "eval_precisions_2": 26.6364, | |
| "eval_precisions_3": 16.6463, | |
| "eval_precisions_4": 11.2167, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4587, | |
| "eval_rouge2": 0.2641, | |
| "eval_rougeL": 0.4427, | |
| "eval_rougeLsum": 0.443, | |
| "eval_runtime": 1991.0459, | |
| "eval_samples_per_second": 1.107, | |
| "eval_steps_per_second": 1.107, | |
| "eval_sys_len": 18291, | |
| "eval_totals_1": 18291, | |
| "eval_totals_2": 16087, | |
| "eval_totals_3": 13883, | |
| "eval_totals_4": 11679, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8323, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 19.4102, | |
| "eval_bp": 0.8507, | |
| "eval_counts_1": 9757, | |
| "eval_counts_2": 4300, | |
| "eval_counts_3": 2341, | |
| "eval_counts_4": 1317, | |
| "eval_exact_match": 0.0472, | |
| "eval_f1": 0.4513, | |
| "eval_gen_len": 13.6239, | |
| "eval_loss": 1.1327157020568848, | |
| "eval_precisions_1": 53.3373, | |
| "eval_precisions_2": 26.7263, | |
| "eval_precisions_3": 16.8599, | |
| "eval_precisions_4": 11.2747, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4587, | |
| "eval_rouge2": 0.2662, | |
| "eval_rougeL": 0.4429, | |
| "eval_rougeLsum": 0.4426, | |
| "eval_runtime": 1972.0648, | |
| "eval_samples_per_second": 1.118, | |
| "eval_steps_per_second": 1.118, | |
| "eval_sys_len": 18293, | |
| "eval_totals_1": 18293, | |
| "eval_totals_2": 16089, | |
| "eval_totals_3": 13885, | |
| "eval_totals_4": 11681, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7742, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 19.3574, | |
| "eval_bp": 0.8497, | |
| "eval_counts_1": 9757, | |
| "eval_counts_2": 4273, | |
| "eval_counts_3": 2324, | |
| "eval_counts_4": 1320, | |
| "eval_exact_match": 0.049, | |
| "eval_f1": 0.451, | |
| "eval_gen_len": 13.5944, | |
| "eval_loss": 1.1574428081512451, | |
| "eval_precisions_1": 53.3957, | |
| "eval_precisions_2": 26.5916, | |
| "eval_precisions_3": 16.7616, | |
| "eval_precisions_4": 11.3198, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4585, | |
| "eval_rouge2": 0.2653, | |
| "eval_rougeL": 0.4431, | |
| "eval_rougeLsum": 0.443, | |
| "eval_runtime": 1991.8737, | |
| "eval_samples_per_second": 1.106, | |
| "eval_steps_per_second": 1.106, | |
| "eval_sys_len": 18273, | |
| "eval_totals_1": 18273, | |
| "eval_totals_2": 16069, | |
| "eval_totals_3": 13865, | |
| "eval_totals_4": 11661, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7101, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 20.1003, | |
| "eval_bp": 0.8694, | |
| "eval_counts_1": 9861, | |
| "eval_counts_2": 4403, | |
| "eval_counts_3": 2438, | |
| "eval_counts_4": 1416, | |
| "eval_exact_match": 0.0531, | |
| "eval_f1": 0.4525, | |
| "eval_gen_len": 13.9133, | |
| "eval_loss": 1.167409896850586, | |
| "eval_precisions_1": 52.8995, | |
| "eval_precisions_2": 26.7871, | |
| "eval_precisions_3": 17.1292, | |
| "eval_precisions_4": 11.7716, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4594, | |
| "eval_rouge2": 0.2689, | |
| "eval_rougeL": 0.444, | |
| "eval_rougeLsum": 0.4435, | |
| "eval_runtime": 2025.3437, | |
| "eval_samples_per_second": 1.088, | |
| "eval_steps_per_second": 1.088, | |
| "eval_sys_len": 18641, | |
| "eval_totals_1": 18641, | |
| "eval_totals_2": 16437, | |
| "eval_totals_3": 14233, | |
| "eval_totals_4": 12029, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6642, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_bleu": 19.655, | |
| "eval_bp": 0.8558, | |
| "eval_counts_1": 9868, | |
| "eval_counts_2": 4380, | |
| "eval_counts_3": 2358, | |
| "eval_counts_4": 1337, | |
| "eval_exact_match": 0.0476, | |
| "eval_f1": 0.4551, | |
| "eval_gen_len": 13.9142, | |
| "eval_loss": 1.1888612508773804, | |
| "eval_precisions_1": 53.6713, | |
| "eval_precisions_2": 27.0671, | |
| "eval_precisions_3": 16.8694, | |
| "eval_precisions_4": 11.3555, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4622, | |
| "eval_rouge2": 0.2694, | |
| "eval_rougeL": 0.4469, | |
| "eval_rougeLsum": 0.4466, | |
| "eval_runtime": 2020.9205, | |
| "eval_samples_per_second": 1.091, | |
| "eval_steps_per_second": 1.091, | |
| "eval_sys_len": 18386, | |
| "eval_totals_1": 18386, | |
| "eval_totals_2": 16182, | |
| "eval_totals_3": 13978, | |
| "eval_totals_4": 11774, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6067, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 19.9169, | |
| "eval_bp": 0.8828, | |
| "eval_counts_1": 9872, | |
| "eval_counts_2": 4384, | |
| "eval_counts_3": 2408, | |
| "eval_counts_4": 1395, | |
| "eval_exact_match": 0.0472, | |
| "eval_f1": 0.4489, | |
| "eval_gen_len": 14.2482, | |
| "eval_loss": 1.2207266092300415, | |
| "eval_precisions_1": 52.2494, | |
| "eval_precisions_2": 26.2672, | |
| "eval_precisions_3": 16.6229, | |
| "eval_precisions_4": 11.3581, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4569, | |
| "eval_rouge2": 0.2667, | |
| "eval_rougeL": 0.441, | |
| "eval_rougeLsum": 0.4408, | |
| "eval_runtime": 2047.2616, | |
| "eval_samples_per_second": 1.077, | |
| "eval_steps_per_second": 1.077, | |
| "eval_sys_len": 18894, | |
| "eval_totals_1": 18894, | |
| "eval_totals_2": 16690, | |
| "eval_totals_3": 14486, | |
| "eval_totals_4": 12282, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5684, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_bleu": 19.5451, | |
| "eval_bp": 0.8831, | |
| "eval_counts_1": 9870, | |
| "eval_counts_2": 4356, | |
| "eval_counts_3": 2360, | |
| "eval_counts_4": 1329, | |
| "eval_exact_match": 0.0485, | |
| "eval_f1": 0.4506, | |
| "eval_gen_len": 14.2432, | |
| "eval_loss": 1.2586854696273804, | |
| "eval_precisions_1": 52.2195, | |
| "eval_precisions_2": 26.0885, | |
| "eval_precisions_3": 16.2837, | |
| "eval_precisions_4": 10.8145, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4581, | |
| "eval_rouge2": 0.2651, | |
| "eval_rougeL": 0.4414, | |
| "eval_rougeLsum": 0.4409, | |
| "eval_runtime": 2126.3316, | |
| "eval_samples_per_second": 1.037, | |
| "eval_steps_per_second": 1.037, | |
| "eval_sys_len": 18901, | |
| "eval_totals_1": 18901, | |
| "eval_totals_2": 16697, | |
| "eval_totals_3": 14493, | |
| "eval_totals_4": 12289, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5288, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bleu": 19.6648, | |
| "eval_bp": 0.8547, | |
| "eval_counts_1": 9815, | |
| "eval_counts_2": 4360, | |
| "eval_counts_3": 2389, | |
| "eval_counts_4": 1335, | |
| "eval_exact_match": 0.0454, | |
| "eval_f1": 0.4504, | |
| "eval_gen_len": 13.7432, | |
| "eval_loss": 1.2803738117218018, | |
| "eval_precisions_1": 53.4382, | |
| "eval_precisions_2": 26.9752, | |
| "eval_precisions_3": 17.1144, | |
| "eval_precisions_4": 11.3569, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4592, | |
| "eval_rouge2": 0.2671, | |
| "eval_rougeL": 0.4443, | |
| "eval_rougeLsum": 0.4436, | |
| "eval_runtime": 3989.4275, | |
| "eval_samples_per_second": 0.552, | |
| "eval_steps_per_second": 0.552, | |
| "eval_sys_len": 18367, | |
| "eval_totals_1": 18367, | |
| "eval_totals_2": 16163, | |
| "eval_totals_3": 13959, | |
| "eval_totals_4": 11755, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4902, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_bleu": 19.8138, | |
| "eval_bp": 0.8766, | |
| "eval_counts_1": 9886, | |
| "eval_counts_2": 4407, | |
| "eval_counts_3": 2398, | |
| "eval_counts_4": 1359, | |
| "eval_exact_match": 0.0495, | |
| "eval_f1": 0.451, | |
| "eval_gen_len": 14.1225, | |
| "eval_loss": 1.321104884147644, | |
| "eval_precisions_1": 52.6495, | |
| "eval_precisions_2": 26.5914, | |
| "eval_precisions_3": 16.6887, | |
| "eval_precisions_4": 11.1714, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4582, | |
| "eval_rouge2": 0.2674, | |
| "eval_rougeL": 0.4426, | |
| "eval_rougeLsum": 0.4421, | |
| "eval_runtime": 2190.6068, | |
| "eval_samples_per_second": 1.006, | |
| "eval_steps_per_second": 1.006, | |
| "eval_sys_len": 18777, | |
| "eval_totals_1": 18777, | |
| "eval_totals_2": 16573, | |
| "eval_totals_3": 14369, | |
| "eval_totals_4": 12165, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4498, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 20.0703, | |
| "eval_bp": 0.909, | |
| "eval_counts_1": 10008, | |
| "eval_counts_2": 4477, | |
| "eval_counts_3": 2456, | |
| "eval_counts_4": 1381, | |
| "eval_exact_match": 0.0476, | |
| "eval_f1": 0.4491, | |
| "eval_gen_len": 14.3725, | |
| "eval_loss": 1.3620938062667847, | |
| "eval_precisions_1": 51.5903, | |
| "eval_precisions_2": 26.0366, | |
| "eval_precisions_3": 16.3832, | |
| "eval_precisions_4": 10.8, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4569, | |
| "eval_rouge2": 0.2679, | |
| "eval_rougeL": 0.4415, | |
| "eval_rougeLsum": 0.4412, | |
| "eval_runtime": 4080.8757, | |
| "eval_samples_per_second": 0.54, | |
| "eval_steps_per_second": 0.54, | |
| "eval_sys_len": 19399, | |
| "eval_totals_1": 19399, | |
| "eval_totals_2": 17195, | |
| "eval_totals_3": 14991, | |
| "eval_totals_4": 12787, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4216, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_bleu": 20.1319, | |
| "eval_bp": 0.8948, | |
| "eval_counts_1": 10016, | |
| "eval_counts_2": 4483, | |
| "eval_counts_3": 2455, | |
| "eval_counts_4": 1385, | |
| "eval_exact_match": 0.0481, | |
| "eval_f1": 0.4531, | |
| "eval_gen_len": 14.3008, | |
| "eval_loss": 1.3966974020004272, | |
| "eval_precisions_1": 52.3712, | |
| "eval_precisions_2": 26.4937, | |
| "eval_precisions_3": 16.6814, | |
| "eval_precisions_4": 11.0685, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4615, | |
| "eval_rouge2": 0.2705, | |
| "eval_rougeL": 0.4457, | |
| "eval_rougeLsum": 0.4451, | |
| "eval_runtime": 3311.0939, | |
| "eval_samples_per_second": 0.666, | |
| "eval_steps_per_second": 0.666, | |
| "eval_sys_len": 19125, | |
| "eval_totals_1": 19125, | |
| "eval_totals_2": 16921, | |
| "eval_totals_3": 14717, | |
| "eval_totals_4": 12513, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3829, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 19.8508, | |
| "eval_bp": 0.9123, | |
| "eval_counts_1": 9976, | |
| "eval_counts_2": 4407, | |
| "eval_counts_3": 2412, | |
| "eval_counts_4": 1374, | |
| "eval_exact_match": 0.0476, | |
| "eval_f1": 0.4479, | |
| "eval_gen_len": 14.7046, | |
| "eval_loss": 1.4460452795028687, | |
| "eval_precisions_1": 51.2536, | |
| "eval_precisions_2": 25.533, | |
| "eval_precisions_3": 16.0202, | |
| "eval_precisions_4": 10.6909, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4556, | |
| "eval_rouge2": 0.2627, | |
| "eval_rougeL": 0.4387, | |
| "eval_rougeLsum": 0.4385, | |
| "eval_runtime": 3748.4463, | |
| "eval_samples_per_second": 0.588, | |
| "eval_steps_per_second": 0.588, | |
| "eval_sys_len": 19464, | |
| "eval_totals_1": 19464, | |
| "eval_totals_2": 17260, | |
| "eval_totals_3": 15056, | |
| "eval_totals_4": 12852, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3551, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_bleu": 20.0572, | |
| "eval_bp": 0.8952, | |
| "eval_counts_1": 10010, | |
| "eval_counts_2": 4451, | |
| "eval_counts_3": 2438, | |
| "eval_counts_4": 1385, | |
| "eval_exact_match": 0.0463, | |
| "eval_f1": 0.4523, | |
| "eval_gen_len": 14.3807, | |
| "eval_loss": 1.4725110530853271, | |
| "eval_precisions_1": 52.3235, | |
| "eval_precisions_2": 26.2953, | |
| "eval_precisions_3": 16.5591, | |
| "eval_precisions_4": 11.0632, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4606, | |
| "eval_rouge2": 0.2672, | |
| "eval_rougeL": 0.4438, | |
| "eval_rougeLsum": 0.4434, | |
| "eval_runtime": 2215.2029, | |
| "eval_samples_per_second": 0.995, | |
| "eval_steps_per_second": 0.995, | |
| "eval_sys_len": 19131, | |
| "eval_totals_1": 19131, | |
| "eval_totals_2": 16927, | |
| "eval_totals_3": 14723, | |
| "eval_totals_4": 12519, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 19.93, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3301, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.93, | |
| "eval_bleu": 19.8047, | |
| "eval_bp": 0.8816, | |
| "eval_counts_1": 9858, | |
| "eval_counts_2": 4378, | |
| "eval_counts_3": 2406, | |
| "eval_counts_4": 1368, | |
| "eval_exact_match": 0.0495, | |
| "eval_f1": 0.4483, | |
| "eval_gen_len": 14.2795, | |
| "eval_loss": 1.5030488967895508, | |
| "eval_precisions_1": 52.2361, | |
| "eval_precisions_2": 26.2659, | |
| "eval_precisions_3": 16.6344, | |
| "eval_precisions_4": 11.1582, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4569, | |
| "eval_rouge2": 0.2644, | |
| "eval_rougeL": 0.4412, | |
| "eval_rougeLsum": 0.4405, | |
| "eval_runtime": 2181.7432, | |
| "eval_samples_per_second": 1.01, | |
| "eval_steps_per_second": 1.01, | |
| "eval_sys_len": 18872, | |
| "eval_totals_1": 18872, | |
| "eval_totals_2": 16668, | |
| "eval_totals_3": 14464, | |
| "eval_totals_4": 12260, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.93, | |
| "step": 2900, | |
| "total_flos": 1.1100924470624256e+18, | |
| "train_loss": 0.6333936349276839, | |
| "train_runtime": 110329.1002, | |
| "train_samples_per_second": 1.688, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2900, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 1.1100924470624256e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |