| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1231567006968077, | |
| "eval_steps": 200, | |
| "global_step": 2600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021606438718738182, | |
| "grad_norm": 0.6293283104896545, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.762, | |
| "num_input_tokens_seen": 43456, | |
| "step": 5, | |
| "train_runtime": 22.3128, | |
| "train_tokens_per_second": 1947.58 | |
| }, | |
| { | |
| "epoch": 0.0043212877437476364, | |
| "grad_norm": 0.709394633769989, | |
| "learning_rate": 1.8e-05, | |
| "loss": 2.7429, | |
| "num_input_tokens_seen": 86704, | |
| "step": 10, | |
| "train_runtime": 44.8748, | |
| "train_tokens_per_second": 1932.131 | |
| }, | |
| { | |
| "epoch": 0.0064819316156214555, | |
| "grad_norm": 0.6257256269454956, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 2.7046, | |
| "num_input_tokens_seen": 130448, | |
| "step": 15, | |
| "train_runtime": 66.2724, | |
| "train_tokens_per_second": 1968.361 | |
| }, | |
| { | |
| "epoch": 0.008642575487495273, | |
| "grad_norm": 0.819546103477478, | |
| "learning_rate": 3.8e-05, | |
| "loss": 2.6496, | |
| "num_input_tokens_seen": 173536, | |
| "step": 20, | |
| "train_runtime": 87.627, | |
| "train_tokens_per_second": 1980.394 | |
| }, | |
| { | |
| "epoch": 0.010803219359369093, | |
| "grad_norm": 1.0976862907409668, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.485, | |
| "num_input_tokens_seen": 217520, | |
| "step": 25, | |
| "train_runtime": 109.5133, | |
| "train_tokens_per_second": 1986.243 | |
| }, | |
| { | |
| "epoch": 0.012963863231242911, | |
| "grad_norm": 1.3042502403259277, | |
| "learning_rate": 5.8e-05, | |
| "loss": 2.2924, | |
| "num_input_tokens_seen": 260592, | |
| "step": 30, | |
| "train_runtime": 130.6452, | |
| "train_tokens_per_second": 1994.654 | |
| }, | |
| { | |
| "epoch": 0.01512450710311673, | |
| "grad_norm": 1.3307358026504517, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 1.9451, | |
| "num_input_tokens_seen": 304000, | |
| "step": 35, | |
| "train_runtime": 152.3406, | |
| "train_tokens_per_second": 1995.529 | |
| }, | |
| { | |
| "epoch": 0.017285150974990546, | |
| "grad_norm": 5.1953444480896, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 1.5215, | |
| "num_input_tokens_seen": 347632, | |
| "step": 40, | |
| "train_runtime": 175.2126, | |
| "train_tokens_per_second": 1984.059 | |
| }, | |
| { | |
| "epoch": 0.019445794846864366, | |
| "grad_norm": 2.9334633350372314, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.9511, | |
| "num_input_tokens_seen": 391584, | |
| "step": 45, | |
| "train_runtime": 196.5345, | |
| "train_tokens_per_second": 1992.444 | |
| }, | |
| { | |
| "epoch": 0.021606438718738186, | |
| "grad_norm": 1.869616150856018, | |
| "learning_rate": 9.8e-05, | |
| "loss": 0.4297, | |
| "num_input_tokens_seen": 434896, | |
| "step": 50, | |
| "train_runtime": 218.7064, | |
| "train_tokens_per_second": 1988.493 | |
| }, | |
| { | |
| "epoch": 0.023767082590612002, | |
| "grad_norm": 0.39343124628067017, | |
| "learning_rate": 0.00010800000000000001, | |
| "loss": 0.2177, | |
| "num_input_tokens_seen": 478208, | |
| "step": 55, | |
| "train_runtime": 240.285, | |
| "train_tokens_per_second": 1990.17 | |
| }, | |
| { | |
| "epoch": 0.025927726462485822, | |
| "grad_norm": 0.3148583173751831, | |
| "learning_rate": 0.000118, | |
| "loss": 0.1802, | |
| "num_input_tokens_seen": 521056, | |
| "step": 60, | |
| "train_runtime": 261.4811, | |
| "train_tokens_per_second": 1992.71 | |
| }, | |
| { | |
| "epoch": 0.02808837033435964, | |
| "grad_norm": 0.1977643072605133, | |
| "learning_rate": 0.00012800000000000002, | |
| "loss": 0.1738, | |
| "num_input_tokens_seen": 564816, | |
| "step": 65, | |
| "train_runtime": 282.7387, | |
| "train_tokens_per_second": 1997.661 | |
| }, | |
| { | |
| "epoch": 0.03024901420623346, | |
| "grad_norm": 0.27150753140449524, | |
| "learning_rate": 0.000138, | |
| "loss": 0.1355, | |
| "num_input_tokens_seen": 607936, | |
| "step": 70, | |
| "train_runtime": 304.3102, | |
| "train_tokens_per_second": 1997.751 | |
| }, | |
| { | |
| "epoch": 0.03240965807810728, | |
| "grad_norm": 0.14433489739894867, | |
| "learning_rate": 0.000148, | |
| "loss": 0.131, | |
| "num_input_tokens_seen": 651184, | |
| "step": 75, | |
| "train_runtime": 325.5002, | |
| "train_tokens_per_second": 2000.564 | |
| }, | |
| { | |
| "epoch": 0.03457030194998109, | |
| "grad_norm": 0.14637072384357452, | |
| "learning_rate": 0.00015800000000000002, | |
| "loss": 0.135, | |
| "num_input_tokens_seen": 694992, | |
| "step": 80, | |
| "train_runtime": 346.7707, | |
| "train_tokens_per_second": 2004.183 | |
| }, | |
| { | |
| "epoch": 0.03673094582185491, | |
| "grad_norm": 0.12602286040782928, | |
| "learning_rate": 0.000168, | |
| "loss": 0.1138, | |
| "num_input_tokens_seen": 738016, | |
| "step": 85, | |
| "train_runtime": 367.8144, | |
| "train_tokens_per_second": 2006.49 | |
| }, | |
| { | |
| "epoch": 0.03889158969372873, | |
| "grad_norm": 0.11580361425876617, | |
| "learning_rate": 0.00017800000000000002, | |
| "loss": 0.1121, | |
| "num_input_tokens_seen": 781584, | |
| "step": 90, | |
| "train_runtime": 389.4092, | |
| "train_tokens_per_second": 2007.102 | |
| }, | |
| { | |
| "epoch": 0.04105223356560255, | |
| "grad_norm": 0.13435131311416626, | |
| "learning_rate": 0.000188, | |
| "loss": 0.1168, | |
| "num_input_tokens_seen": 825184, | |
| "step": 95, | |
| "train_runtime": 411.2782, | |
| "train_tokens_per_second": 2006.389 | |
| }, | |
| { | |
| "epoch": 0.04321287743747637, | |
| "grad_norm": 0.11393298953771591, | |
| "learning_rate": 0.00019800000000000002, | |
| "loss": 0.103, | |
| "num_input_tokens_seen": 868384, | |
| "step": 100, | |
| "train_runtime": 433.0861, | |
| "train_tokens_per_second": 2005.107 | |
| }, | |
| { | |
| "epoch": 0.045373521309350184, | |
| "grad_norm": 0.11593123525381088, | |
| "learning_rate": 0.00019999961523722363, | |
| "loss": 0.1113, | |
| "num_input_tokens_seen": 912000, | |
| "step": 105, | |
| "train_runtime": 456.2462, | |
| "train_tokens_per_second": 1998.921 | |
| }, | |
| { | |
| "epoch": 0.047534165181224004, | |
| "grad_norm": 0.13952848315238953, | |
| "learning_rate": 0.00019999805214351914, | |
| "loss": 0.105, | |
| "num_input_tokens_seen": 955216, | |
| "step": 110, | |
| "train_runtime": 477.4357, | |
| "train_tokens_per_second": 2000.722 | |
| }, | |
| { | |
| "epoch": 0.049694809053097824, | |
| "grad_norm": 0.09782172739505768, | |
| "learning_rate": 0.0001999952866899929, | |
| "loss": 0.0944, | |
| "num_input_tokens_seen": 998384, | |
| "step": 115, | |
| "train_runtime": 498.2711, | |
| "train_tokens_per_second": 2003.696 | |
| }, | |
| { | |
| "epoch": 0.051855452924971644, | |
| "grad_norm": 0.11062366515398026, | |
| "learning_rate": 0.00019999131890989627, | |
| "loss": 0.1028, | |
| "num_input_tokens_seen": 1041632, | |
| "step": 120, | |
| "train_runtime": 520.2058, | |
| "train_tokens_per_second": 2002.346 | |
| }, | |
| { | |
| "epoch": 0.05401609679684546, | |
| "grad_norm": 0.0976124033331871, | |
| "learning_rate": 0.00019998614885093717, | |
| "loss": 0.0926, | |
| "num_input_tokens_seen": 1084576, | |
| "step": 125, | |
| "train_runtime": 541.3878, | |
| "train_tokens_per_second": 2003.326 | |
| }, | |
| { | |
| "epoch": 0.05617674066871928, | |
| "grad_norm": 0.1115734875202179, | |
| "learning_rate": 0.00019997977657527956, | |
| "loss": 0.0913, | |
| "num_input_tokens_seen": 1127856, | |
| "step": 130, | |
| "train_runtime": 562.8485, | |
| "train_tokens_per_second": 2003.836 | |
| }, | |
| { | |
| "epoch": 0.0583373845405931, | |
| "grad_norm": 0.10568796098232269, | |
| "learning_rate": 0.00019997220215954258, | |
| "loss": 0.1054, | |
| "num_input_tokens_seen": 1171424, | |
| "step": 135, | |
| "train_runtime": 584.7953, | |
| "train_tokens_per_second": 2003.135 | |
| }, | |
| { | |
| "epoch": 0.06049802841246692, | |
| "grad_norm": 0.10306553542613983, | |
| "learning_rate": 0.00019996342569479972, | |
| "loss": 0.099, | |
| "num_input_tokens_seen": 1214848, | |
| "step": 140, | |
| "train_runtime": 607.0714, | |
| "train_tokens_per_second": 2001.162 | |
| }, | |
| { | |
| "epoch": 0.06265867228434073, | |
| "grad_norm": 0.09895918518304825, | |
| "learning_rate": 0.00019995344728657773, | |
| "loss": 0.0887, | |
| "num_input_tokens_seen": 1258080, | |
| "step": 145, | |
| "train_runtime": 628.7533, | |
| "train_tokens_per_second": 2000.912 | |
| }, | |
| { | |
| "epoch": 0.06481931615621456, | |
| "grad_norm": 0.08400742709636688, | |
| "learning_rate": 0.00019994226705485538, | |
| "loss": 0.1008, | |
| "num_input_tokens_seen": 1301680, | |
| "step": 150, | |
| "train_runtime": 649.8435, | |
| "train_tokens_per_second": 2003.067 | |
| }, | |
| { | |
| "epoch": 0.06697996002808837, | |
| "grad_norm": 0.09079141914844513, | |
| "learning_rate": 0.0001999298851340618, | |
| "loss": 0.1107, | |
| "num_input_tokens_seen": 1345520, | |
| "step": 155, | |
| "train_runtime": 671.9504, | |
| "train_tokens_per_second": 2002.41 | |
| }, | |
| { | |
| "epoch": 0.06914060389996218, | |
| "grad_norm": 0.11345323175191879, | |
| "learning_rate": 0.00019991630167307523, | |
| "loss": 0.0963, | |
| "num_input_tokens_seen": 1388816, | |
| "step": 160, | |
| "train_runtime": 693.7679, | |
| "train_tokens_per_second": 2001.845 | |
| }, | |
| { | |
| "epoch": 0.07130124777183601, | |
| "grad_norm": 0.13441255688667297, | |
| "learning_rate": 0.00019990151683522086, | |
| "loss": 0.0884, | |
| "num_input_tokens_seen": 1431936, | |
| "step": 165, | |
| "train_runtime": 714.9933, | |
| "train_tokens_per_second": 2002.727 | |
| }, | |
| { | |
| "epoch": 0.07346189164370982, | |
| "grad_norm": 0.1069858968257904, | |
| "learning_rate": 0.00019988553079826912, | |
| "loss": 0.0893, | |
| "num_input_tokens_seen": 1475008, | |
| "step": 170, | |
| "train_runtime": 737.0241, | |
| "train_tokens_per_second": 2001.302 | |
| }, | |
| { | |
| "epoch": 0.07562253551558365, | |
| "grad_norm": 0.08582285791635513, | |
| "learning_rate": 0.00019986834375443344, | |
| "loss": 0.0948, | |
| "num_input_tokens_seen": 1518496, | |
| "step": 175, | |
| "train_runtime": 758.2236, | |
| "train_tokens_per_second": 2002.702 | |
| }, | |
| { | |
| "epoch": 0.07778317938745746, | |
| "grad_norm": 0.1251683533191681, | |
| "learning_rate": 0.00019984995591036797, | |
| "loss": 0.0878, | |
| "num_input_tokens_seen": 1561744, | |
| "step": 180, | |
| "train_runtime": 779.9879, | |
| "train_tokens_per_second": 2002.267 | |
| }, | |
| { | |
| "epoch": 0.07994382325933128, | |
| "grad_norm": 0.11450210958719254, | |
| "learning_rate": 0.00019983036748716504, | |
| "loss": 0.1033, | |
| "num_input_tokens_seen": 1605408, | |
| "step": 185, | |
| "train_runtime": 801.5796, | |
| "train_tokens_per_second": 2002.805 | |
| }, | |
| { | |
| "epoch": 0.0821044671312051, | |
| "grad_norm": 0.09959390759468079, | |
| "learning_rate": 0.00019980957872035258, | |
| "loss": 0.0977, | |
| "num_input_tokens_seen": 1649280, | |
| "step": 190, | |
| "train_runtime": 822.8119, | |
| "train_tokens_per_second": 2004.444 | |
| }, | |
| { | |
| "epoch": 0.08426511100307892, | |
| "grad_norm": 0.09225820004940033, | |
| "learning_rate": 0.00019978758985989128, | |
| "loss": 0.0896, | |
| "num_input_tokens_seen": 1692512, | |
| "step": 195, | |
| "train_runtime": 844.7454, | |
| "train_tokens_per_second": 2003.577 | |
| }, | |
| { | |
| "epoch": 0.08642575487495274, | |
| "grad_norm": 0.13596701622009277, | |
| "learning_rate": 0.00019976440117017144, | |
| "loss": 0.0956, | |
| "num_input_tokens_seen": 1735840, | |
| "step": 200, | |
| "train_runtime": 866.713, | |
| "train_tokens_per_second": 2002.785 | |
| }, | |
| { | |
| "epoch": 0.08642575487495274, | |
| "eval_loss": 0.09148535877466202, | |
| "eval_runtime": 710.7457, | |
| "eval_samples_per_second": 13.024, | |
| "eval_steps_per_second": 6.513, | |
| "num_input_tokens_seen": 1735840, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08858639874682656, | |
| "grad_norm": 0.11967090517282486, | |
| "learning_rate": 0.00019974001293001002, | |
| "loss": 0.1006, | |
| "num_input_tokens_seen": 1779424, | |
| "step": 205, | |
| "train_runtime": 1603.3338, | |
| "train_tokens_per_second": 1109.828 | |
| }, | |
| { | |
| "epoch": 0.09074704261870037, | |
| "grad_norm": 0.10459703952074051, | |
| "learning_rate": 0.00019971442543264712, | |
| "loss": 0.1082, | |
| "num_input_tokens_seen": 1823168, | |
| "step": 210, | |
| "train_runtime": 1624.6497, | |
| "train_tokens_per_second": 1122.191 | |
| }, | |
| { | |
| "epoch": 0.0929076864905742, | |
| "grad_norm": 0.0971212238073349, | |
| "learning_rate": 0.00019968763898574253, | |
| "loss": 0.0975, | |
| "num_input_tokens_seen": 1866528, | |
| "step": 215, | |
| "train_runtime": 1645.7576, | |
| "train_tokens_per_second": 1134.145 | |
| }, | |
| { | |
| "epoch": 0.09506833036244801, | |
| "grad_norm": 0.07356134802103043, | |
| "learning_rate": 0.000199659653911372, | |
| "loss": 0.0821, | |
| "num_input_tokens_seen": 1909808, | |
| "step": 220, | |
| "train_runtime": 1667.2345, | |
| "train_tokens_per_second": 1145.495 | |
| }, | |
| { | |
| "epoch": 0.09722897423432182, | |
| "grad_norm": 0.10070156306028366, | |
| "learning_rate": 0.00019963047054602338, | |
| "loss": 0.087, | |
| "num_input_tokens_seen": 1953008, | |
| "step": 225, | |
| "train_runtime": 1689.0691, | |
| "train_tokens_per_second": 1156.263 | |
| }, | |
| { | |
| "epoch": 0.09938961810619565, | |
| "grad_norm": 0.09706509113311768, | |
| "learning_rate": 0.00019960008924059254, | |
| "loss": 0.0993, | |
| "num_input_tokens_seen": 1996752, | |
| "step": 230, | |
| "train_runtime": 1710.7637, | |
| "train_tokens_per_second": 1167.17 | |
| }, | |
| { | |
| "epoch": 0.10155026197806946, | |
| "grad_norm": 0.09175528585910797, | |
| "learning_rate": 0.0001995685103603792, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 2039968, | |
| "step": 235, | |
| "train_runtime": 1732.6168, | |
| "train_tokens_per_second": 1177.391 | |
| }, | |
| { | |
| "epoch": 0.10371090584994329, | |
| "grad_norm": 0.1141452044248581, | |
| "learning_rate": 0.0001995357342850826, | |
| "loss": 0.0992, | |
| "num_input_tokens_seen": 2083696, | |
| "step": 240, | |
| "train_runtime": 1755.0335, | |
| "train_tokens_per_second": 1187.269 | |
| }, | |
| { | |
| "epoch": 0.1058715497218171, | |
| "grad_norm": 0.11164279282093048, | |
| "learning_rate": 0.00019950176140879668, | |
| "loss": 0.0877, | |
| "num_input_tokens_seen": 2127072, | |
| "step": 245, | |
| "train_runtime": 1777.3851, | |
| "train_tokens_per_second": 1196.742 | |
| }, | |
| { | |
| "epoch": 0.10803219359369091, | |
| "grad_norm": 0.09218638390302658, | |
| "learning_rate": 0.00019946659214000568, | |
| "loss": 0.0856, | |
| "num_input_tokens_seen": 2170448, | |
| "step": 250, | |
| "train_runtime": 1799.395, | |
| "train_tokens_per_second": 1206.21 | |
| }, | |
| { | |
| "epoch": 0.11019283746556474, | |
| "grad_norm": 0.09893018752336502, | |
| "learning_rate": 0.00019943022690157894, | |
| "loss": 0.0839, | |
| "num_input_tokens_seen": 2213648, | |
| "step": 255, | |
| "train_runtime": 1821.4888, | |
| "train_tokens_per_second": 1215.296 | |
| }, | |
| { | |
| "epoch": 0.11235348133743855, | |
| "grad_norm": 0.10116513073444366, | |
| "learning_rate": 0.00019939266613076603, | |
| "loss": 0.091, | |
| "num_input_tokens_seen": 2257296, | |
| "step": 260, | |
| "train_runtime": 1843.5073, | |
| "train_tokens_per_second": 1224.457 | |
| }, | |
| { | |
| "epoch": 0.11451412520931238, | |
| "grad_norm": 0.08388976752758026, | |
| "learning_rate": 0.00019935391027919134, | |
| "loss": 0.0855, | |
| "num_input_tokens_seen": 2300624, | |
| "step": 265, | |
| "train_runtime": 1864.8115, | |
| "train_tokens_per_second": 1233.703 | |
| }, | |
| { | |
| "epoch": 0.1166747690811862, | |
| "grad_norm": 0.08835554867982864, | |
| "learning_rate": 0.00019931395981284878, | |
| "loss": 0.0885, | |
| "num_input_tokens_seen": 2344096, | |
| "step": 270, | |
| "train_runtime": 1887.3294, | |
| "train_tokens_per_second": 1242.017 | |
| }, | |
| { | |
| "epoch": 0.11883541295306001, | |
| "grad_norm": 0.09479964524507523, | |
| "learning_rate": 0.000199272815212096, | |
| "loss": 0.0909, | |
| "num_input_tokens_seen": 2387696, | |
| "step": 275, | |
| "train_runtime": 1909.7108, | |
| "train_tokens_per_second": 1250.292 | |
| }, | |
| { | |
| "epoch": 0.12099605682493383, | |
| "grad_norm": 0.09753034263849258, | |
| "learning_rate": 0.00019923047697164884, | |
| "loss": 0.0937, | |
| "num_input_tokens_seen": 2431264, | |
| "step": 280, | |
| "train_runtime": 1931.1784, | |
| "train_tokens_per_second": 1258.954 | |
| }, | |
| { | |
| "epoch": 0.12315670069680765, | |
| "grad_norm": 0.09564550220966339, | |
| "learning_rate": 0.00019918694560057518, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 2474928, | |
| "step": 285, | |
| "train_runtime": 1952.8165, | |
| "train_tokens_per_second": 1267.363 | |
| }, | |
| { | |
| "epoch": 0.12531734456868146, | |
| "grad_norm": 0.08517869561910629, | |
| "learning_rate": 0.0001991422216222889, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 2517936, | |
| "step": 290, | |
| "train_runtime": 1973.8825, | |
| "train_tokens_per_second": 1275.626 | |
| }, | |
| { | |
| "epoch": 0.12747798844055527, | |
| "grad_norm": 0.09123244881629944, | |
| "learning_rate": 0.0001990963055745437, | |
| "loss": 0.0872, | |
| "num_input_tokens_seen": 2561312, | |
| "step": 295, | |
| "train_runtime": 1995.4942, | |
| "train_tokens_per_second": 1283.548 | |
| }, | |
| { | |
| "epoch": 0.12963863231242911, | |
| "grad_norm": 0.08103613555431366, | |
| "learning_rate": 0.0001990491980094264, | |
| "loss": 0.0811, | |
| "num_input_tokens_seen": 2604464, | |
| "step": 300, | |
| "train_runtime": 2016.775, | |
| "train_tokens_per_second": 1291.4 | |
| }, | |
| { | |
| "epoch": 0.13179927618430293, | |
| "grad_norm": 0.10851939767599106, | |
| "learning_rate": 0.00019900089949335042, | |
| "loss": 0.0964, | |
| "num_input_tokens_seen": 2648432, | |
| "step": 305, | |
| "train_runtime": 2039.7188, | |
| "train_tokens_per_second": 1298.43 | |
| }, | |
| { | |
| "epoch": 0.13395992005617674, | |
| "grad_norm": 0.08056960254907608, | |
| "learning_rate": 0.00019895141060704912, | |
| "loss": 0.0715, | |
| "num_input_tokens_seen": 2691472, | |
| "step": 310, | |
| "train_runtime": 2060.7107, | |
| "train_tokens_per_second": 1306.089 | |
| }, | |
| { | |
| "epoch": 0.13612056392805055, | |
| "grad_norm": 0.11214818060398102, | |
| "learning_rate": 0.0001989007319455685, | |
| "loss": 0.0839, | |
| "num_input_tokens_seen": 2734672, | |
| "step": 315, | |
| "train_runtime": 2082.0763, | |
| "train_tokens_per_second": 1313.435 | |
| }, | |
| { | |
| "epoch": 0.13828120779992437, | |
| "grad_norm": 0.11480865627527237, | |
| "learning_rate": 0.00019884886411826035, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 2777792, | |
| "step": 320, | |
| "train_runtime": 2103.4799, | |
| "train_tokens_per_second": 1320.57 | |
| }, | |
| { | |
| "epoch": 0.1404418516717982, | |
| "grad_norm": 0.08724990487098694, | |
| "learning_rate": 0.0001987958077487747, | |
| "loss": 0.0846, | |
| "num_input_tokens_seen": 2821232, | |
| "step": 325, | |
| "train_runtime": 2125.391, | |
| "train_tokens_per_second": 1327.394 | |
| }, | |
| { | |
| "epoch": 0.14260249554367202, | |
| "grad_norm": 0.09756699949502945, | |
| "learning_rate": 0.00019874156347505242, | |
| "loss": 0.0902, | |
| "num_input_tokens_seen": 2864736, | |
| "step": 330, | |
| "train_runtime": 2147.0956, | |
| "train_tokens_per_second": 1334.238 | |
| }, | |
| { | |
| "epoch": 0.14476313941554583, | |
| "grad_norm": 0.08448482304811478, | |
| "learning_rate": 0.0001986861319493176, | |
| "loss": 0.0826, | |
| "num_input_tokens_seen": 2908048, | |
| "step": 335, | |
| "train_runtime": 2168.7932, | |
| "train_tokens_per_second": 1340.86 | |
| }, | |
| { | |
| "epoch": 0.14692378328741965, | |
| "grad_norm": 0.10293188691139221, | |
| "learning_rate": 0.0001986295138380696, | |
| "loss": 0.0816, | |
| "num_input_tokens_seen": 2951424, | |
| "step": 340, | |
| "train_runtime": 2189.9397, | |
| "train_tokens_per_second": 1347.719 | |
| }, | |
| { | |
| "epoch": 0.14908442715929346, | |
| "grad_norm": 0.09431219100952148, | |
| "learning_rate": 0.00019857170982207518, | |
| "loss": 0.097, | |
| "num_input_tokens_seen": 2995280, | |
| "step": 345, | |
| "train_runtime": 2211.8567, | |
| "train_tokens_per_second": 1354.193 | |
| }, | |
| { | |
| "epoch": 0.1512450710311673, | |
| "grad_norm": 0.08135256171226501, | |
| "learning_rate": 0.00019851272059636003, | |
| "loss": 0.0777, | |
| "num_input_tokens_seen": 3038272, | |
| "step": 350, | |
| "train_runtime": 2233.3418, | |
| "train_tokens_per_second": 1360.415 | |
| }, | |
| { | |
| "epoch": 0.1534057149030411, | |
| "grad_norm": 0.08785713464021683, | |
| "learning_rate": 0.00019845254687020077, | |
| "loss": 0.0881, | |
| "num_input_tokens_seen": 3081776, | |
| "step": 355, | |
| "train_runtime": 2254.994, | |
| "train_tokens_per_second": 1366.645 | |
| }, | |
| { | |
| "epoch": 0.15556635877491493, | |
| "grad_norm": 0.07563728839159012, | |
| "learning_rate": 0.0001983911893671162, | |
| "loss": 0.0737, | |
| "num_input_tokens_seen": 3124848, | |
| "step": 360, | |
| "train_runtime": 2277.7146, | |
| "train_tokens_per_second": 1371.923 | |
| }, | |
| { | |
| "epoch": 0.15772700264678874, | |
| "grad_norm": 0.10026325285434723, | |
| "learning_rate": 0.00019832864882485862, | |
| "loss": 0.0756, | |
| "num_input_tokens_seen": 3167904, | |
| "step": 365, | |
| "train_runtime": 2299.0524, | |
| "train_tokens_per_second": 1377.917 | |
| }, | |
| { | |
| "epoch": 0.15988764651866255, | |
| "grad_norm": 0.09677627682685852, | |
| "learning_rate": 0.00019826492599540508, | |
| "loss": 0.0805, | |
| "num_input_tokens_seen": 3210928, | |
| "step": 370, | |
| "train_runtime": 2320.9453, | |
| "train_tokens_per_second": 1383.457 | |
| }, | |
| { | |
| "epoch": 0.1620482903905364, | |
| "grad_norm": 0.1106276884675026, | |
| "learning_rate": 0.00019820002164494817, | |
| "loss": 0.0956, | |
| "num_input_tokens_seen": 3254384, | |
| "step": 375, | |
| "train_runtime": 2342.6293, | |
| "train_tokens_per_second": 1389.201 | |
| }, | |
| { | |
| "epoch": 0.1642089342624102, | |
| "grad_norm": 0.09862922132015228, | |
| "learning_rate": 0.00019813393655388696, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 3297280, | |
| "step": 380, | |
| "train_runtime": 2364.2207, | |
| "train_tokens_per_second": 1394.658 | |
| }, | |
| { | |
| "epoch": 0.16636957813428402, | |
| "grad_norm": 0.08001670241355896, | |
| "learning_rate": 0.00019806667151681744, | |
| "loss": 0.087, | |
| "num_input_tokens_seen": 3340832, | |
| "step": 385, | |
| "train_runtime": 2386.4765, | |
| "train_tokens_per_second": 1399.901 | |
| }, | |
| { | |
| "epoch": 0.16853022200615783, | |
| "grad_norm": 0.10081043839454651, | |
| "learning_rate": 0.0001979982273425232, | |
| "loss": 0.0727, | |
| "num_input_tokens_seen": 3383856, | |
| "step": 390, | |
| "train_runtime": 2408.253, | |
| "train_tokens_per_second": 1405.108 | |
| }, | |
| { | |
| "epoch": 0.17069086587803164, | |
| "grad_norm": 0.10049381852149963, | |
| "learning_rate": 0.00019792860485396554, | |
| "loss": 0.0931, | |
| "num_input_tokens_seen": 3427360, | |
| "step": 395, | |
| "train_runtime": 2430.5513, | |
| "train_tokens_per_second": 1410.116 | |
| }, | |
| { | |
| "epoch": 0.17285150974990549, | |
| "grad_norm": 0.10096573084592819, | |
| "learning_rate": 0.00019785780488827356, | |
| "loss": 0.0926, | |
| "num_input_tokens_seen": 3470800, | |
| "step": 400, | |
| "train_runtime": 2451.9847, | |
| "train_tokens_per_second": 1415.506 | |
| }, | |
| { | |
| "epoch": 0.17285150974990549, | |
| "eval_loss": 0.0820649117231369, | |
| "eval_runtime": 711.7049, | |
| "eval_samples_per_second": 13.007, | |
| "eval_steps_per_second": 6.504, | |
| "num_input_tokens_seen": 3470800, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1750121536217793, | |
| "grad_norm": 0.06486905366182327, | |
| "learning_rate": 0.00019778582829673414, | |
| "loss": 0.0722, | |
| "num_input_tokens_seen": 3514048, | |
| "step": 405, | |
| "train_runtime": 3190.7217, | |
| "train_tokens_per_second": 1101.333 | |
| }, | |
| { | |
| "epoch": 0.1771727974936531, | |
| "grad_norm": 0.07102972269058228, | |
| "learning_rate": 0.00019771267594478184, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 3557232, | |
| "step": 410, | |
| "train_runtime": 3212.7197, | |
| "train_tokens_per_second": 1107.234 | |
| }, | |
| { | |
| "epoch": 0.17933344136552692, | |
| "grad_norm": 0.0909392461180687, | |
| "learning_rate": 0.00019763834871198825, | |
| "loss": 0.0833, | |
| "num_input_tokens_seen": 3600528, | |
| "step": 415, | |
| "train_runtime": 3233.939, | |
| "train_tokens_per_second": 1113.357 | |
| }, | |
| { | |
| "epoch": 0.18149408523740074, | |
| "grad_norm": 0.10018228739500046, | |
| "learning_rate": 0.00019756284749205153, | |
| "loss": 0.0959, | |
| "num_input_tokens_seen": 3644144, | |
| "step": 420, | |
| "train_runtime": 3255.3066, | |
| "train_tokens_per_second": 1119.447 | |
| }, | |
| { | |
| "epoch": 0.18365472910927455, | |
| "grad_norm": 0.08901096135377884, | |
| "learning_rate": 0.00019748617319278577, | |
| "loss": 0.089, | |
| "num_input_tokens_seen": 3687856, | |
| "step": 425, | |
| "train_runtime": 3277.573, | |
| "train_tokens_per_second": 1125.179 | |
| }, | |
| { | |
| "epoch": 0.1858153729811484, | |
| "grad_norm": 0.07547247409820557, | |
| "learning_rate": 0.0001974083267361098, | |
| "loss": 0.0883, | |
| "num_input_tokens_seen": 3731552, | |
| "step": 430, | |
| "train_runtime": 3299.6823, | |
| "train_tokens_per_second": 1130.882 | |
| }, | |
| { | |
| "epoch": 0.1879760168530222, | |
| "grad_norm": 0.0945342481136322, | |
| "learning_rate": 0.00019732930905803643, | |
| "loss": 0.0807, | |
| "num_input_tokens_seen": 3774768, | |
| "step": 435, | |
| "train_runtime": 3322.0043, | |
| "train_tokens_per_second": 1136.292 | |
| }, | |
| { | |
| "epoch": 0.19013666072489602, | |
| "grad_norm": 0.09098955243825912, | |
| "learning_rate": 0.00019724912110866098, | |
| "loss": 0.0864, | |
| "num_input_tokens_seen": 3818432, | |
| "step": 440, | |
| "train_runtime": 3343.5489, | |
| "train_tokens_per_second": 1142.03 | |
| }, | |
| { | |
| "epoch": 0.19229730459676983, | |
| "grad_norm": 0.07744833081960678, | |
| "learning_rate": 0.0001971677638521499, | |
| "loss": 0.0795, | |
| "num_input_tokens_seen": 3861376, | |
| "step": 445, | |
| "train_runtime": 3365.195, | |
| "train_tokens_per_second": 1147.445 | |
| }, | |
| { | |
| "epoch": 0.19445794846864364, | |
| "grad_norm": 0.07406079024076462, | |
| "learning_rate": 0.0001970852382667292, | |
| "loss": 0.0846, | |
| "num_input_tokens_seen": 3905008, | |
| "step": 450, | |
| "train_runtime": 3386.6968, | |
| "train_tokens_per_second": 1153.043 | |
| }, | |
| { | |
| "epoch": 0.19661859234051748, | |
| "grad_norm": 0.08702688664197922, | |
| "learning_rate": 0.00019700154534467272, | |
| "loss": 0.0807, | |
| "num_input_tokens_seen": 3948368, | |
| "step": 455, | |
| "train_runtime": 3408.6979, | |
| "train_tokens_per_second": 1158.321 | |
| }, | |
| { | |
| "epoch": 0.1987792362123913, | |
| "grad_norm": 0.1021113395690918, | |
| "learning_rate": 0.0001969166860922901, | |
| "loss": 0.0839, | |
| "num_input_tokens_seen": 3992160, | |
| "step": 460, | |
| "train_runtime": 3430.6539, | |
| "train_tokens_per_second": 1163.673 | |
| }, | |
| { | |
| "epoch": 0.2009398800842651, | |
| "grad_norm": 0.06688612699508667, | |
| "learning_rate": 0.00019683066152991477, | |
| "loss": 0.0779, | |
| "num_input_tokens_seen": 4035088, | |
| "step": 465, | |
| "train_runtime": 3451.9765, | |
| "train_tokens_per_second": 1168.921 | |
| }, | |
| { | |
| "epoch": 0.20310052395613892, | |
| "grad_norm": 0.06544195860624313, | |
| "learning_rate": 0.00019674347269189172, | |
| "loss": 0.0798, | |
| "num_input_tokens_seen": 4078368, | |
| "step": 470, | |
| "train_runtime": 3473.2579, | |
| "train_tokens_per_second": 1174.22 | |
| }, | |
| { | |
| "epoch": 0.20526116782801274, | |
| "grad_norm": 0.0765095129609108, | |
| "learning_rate": 0.00019665512062656481, | |
| "loss": 0.077, | |
| "num_input_tokens_seen": 4121200, | |
| "step": 475, | |
| "train_runtime": 3494.1767, | |
| "train_tokens_per_second": 1179.448 | |
| }, | |
| { | |
| "epoch": 0.20742181169988658, | |
| "grad_norm": 0.09076128900051117, | |
| "learning_rate": 0.00019656560639626455, | |
| "loss": 0.0777, | |
| "num_input_tokens_seen": 4164272, | |
| "step": 480, | |
| "train_runtime": 3515.4286, | |
| "train_tokens_per_second": 1184.57 | |
| }, | |
| { | |
| "epoch": 0.2095824555717604, | |
| "grad_norm": 0.084928959608078, | |
| "learning_rate": 0.00019647493107729505, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 4207440, | |
| "step": 485, | |
| "train_runtime": 3537.3488, | |
| "train_tokens_per_second": 1189.433 | |
| }, | |
| { | |
| "epoch": 0.2117430994436342, | |
| "grad_norm": 0.07026708126068115, | |
| "learning_rate": 0.0001963830957599211, | |
| "loss": 0.0812, | |
| "num_input_tokens_seen": 4250592, | |
| "step": 490, | |
| "train_runtime": 3559.4743, | |
| "train_tokens_per_second": 1194.163 | |
| }, | |
| { | |
| "epoch": 0.21390374331550802, | |
| "grad_norm": 0.08042414486408234, | |
| "learning_rate": 0.0001962901015483552, | |
| "loss": 0.0776, | |
| "num_input_tokens_seen": 4293824, | |
| "step": 495, | |
| "train_runtime": 3581.141, | |
| "train_tokens_per_second": 1199.01 | |
| }, | |
| { | |
| "epoch": 0.21606438718738183, | |
| "grad_norm": 0.10094469785690308, | |
| "learning_rate": 0.00019619594956074416, | |
| "loss": 0.0883, | |
| "num_input_tokens_seen": 4337648, | |
| "step": 500, | |
| "train_runtime": 3604.6947, | |
| "train_tokens_per_second": 1203.333 | |
| }, | |
| { | |
| "epoch": 0.21822503105925567, | |
| "grad_norm": 0.08260887116193771, | |
| "learning_rate": 0.0001961006409291557, | |
| "loss": 0.0728, | |
| "num_input_tokens_seen": 4380864, | |
| "step": 505, | |
| "train_runtime": 3626.825, | |
| "train_tokens_per_second": 1207.906 | |
| }, | |
| { | |
| "epoch": 0.22038567493112948, | |
| "grad_norm": 0.09819753468036652, | |
| "learning_rate": 0.00019600417679956485, | |
| "loss": 0.0995, | |
| "num_input_tokens_seen": 4425184, | |
| "step": 510, | |
| "train_runtime": 3649.4129, | |
| "train_tokens_per_second": 1212.574 | |
| }, | |
| { | |
| "epoch": 0.2225463188030033, | |
| "grad_norm": 0.08613722771406174, | |
| "learning_rate": 0.00019590655833184008, | |
| "loss": 0.0913, | |
| "num_input_tokens_seen": 4469072, | |
| "step": 515, | |
| "train_runtime": 3671.0009, | |
| "train_tokens_per_second": 1217.399 | |
| }, | |
| { | |
| "epoch": 0.2247069626748771, | |
| "grad_norm": 0.0694877877831459, | |
| "learning_rate": 0.00019580778669972958, | |
| "loss": 0.0776, | |
| "num_input_tokens_seen": 4512896, | |
| "step": 520, | |
| "train_runtime": 3692.8898, | |
| "train_tokens_per_second": 1222.05 | |
| }, | |
| { | |
| "epoch": 0.22686760654675092, | |
| "grad_norm": 0.07937192916870117, | |
| "learning_rate": 0.0001957078630908468, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 4556272, | |
| "step": 525, | |
| "train_runtime": 3714.5571, | |
| "train_tokens_per_second": 1226.599 | |
| }, | |
| { | |
| "epoch": 0.22902825041862476, | |
| "grad_norm": 0.11298541724681854, | |
| "learning_rate": 0.00019560678870665657, | |
| "loss": 0.0931, | |
| "num_input_tokens_seen": 4600080, | |
| "step": 530, | |
| "train_runtime": 3736.33, | |
| "train_tokens_per_second": 1231.176 | |
| }, | |
| { | |
| "epoch": 0.23118889429049858, | |
| "grad_norm": 0.10687752813100815, | |
| "learning_rate": 0.00019550456476246026, | |
| "loss": 0.0846, | |
| "num_input_tokens_seen": 4643520, | |
| "step": 535, | |
| "train_runtime": 3757.6182, | |
| "train_tokens_per_second": 1235.762 | |
| }, | |
| { | |
| "epoch": 0.2333495381623724, | |
| "grad_norm": 0.0689394399523735, | |
| "learning_rate": 0.00019540119248738152, | |
| "loss": 0.0825, | |
| "num_input_tokens_seen": 4686992, | |
| "step": 540, | |
| "train_runtime": 3779.7487, | |
| "train_tokens_per_second": 1240.027 | |
| }, | |
| { | |
| "epoch": 0.2355101820342462, | |
| "grad_norm": 0.07380052655935287, | |
| "learning_rate": 0.00019529667312435123, | |
| "loss": 0.0718, | |
| "num_input_tokens_seen": 4729856, | |
| "step": 545, | |
| "train_runtime": 3802.086, | |
| "train_tokens_per_second": 1244.016 | |
| }, | |
| { | |
| "epoch": 0.23767082590612001, | |
| "grad_norm": 0.07186949253082275, | |
| "learning_rate": 0.00019519100793009267, | |
| "loss": 0.0728, | |
| "num_input_tokens_seen": 4773024, | |
| "step": 550, | |
| "train_runtime": 3823.73, | |
| "train_tokens_per_second": 1248.264 | |
| }, | |
| { | |
| "epoch": 0.23983146977799386, | |
| "grad_norm": 0.06932114064693451, | |
| "learning_rate": 0.00019508419817510647, | |
| "loss": 0.0742, | |
| "num_input_tokens_seen": 4815984, | |
| "step": 555, | |
| "train_runtime": 3844.7929, | |
| "train_tokens_per_second": 1252.599 | |
| }, | |
| { | |
| "epoch": 0.24199211364986767, | |
| "grad_norm": 0.08667387068271637, | |
| "learning_rate": 0.0001949762451436552, | |
| "loss": 0.0791, | |
| "num_input_tokens_seen": 4859168, | |
| "step": 560, | |
| "train_runtime": 3866.2134, | |
| "train_tokens_per_second": 1256.829 | |
| }, | |
| { | |
| "epoch": 0.24415275752174148, | |
| "grad_norm": 0.10014659911394119, | |
| "learning_rate": 0.00019486715013374803, | |
| "loss": 0.0818, | |
| "num_input_tokens_seen": 4902272, | |
| "step": 565, | |
| "train_runtime": 3887.7872, | |
| "train_tokens_per_second": 1260.941 | |
| }, | |
| { | |
| "epoch": 0.2463134013936153, | |
| "grad_norm": 0.06697220355272293, | |
| "learning_rate": 0.00019475691445712507, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 4945312, | |
| "step": 570, | |
| "train_runtime": 3909.6692, | |
| "train_tokens_per_second": 1264.893 | |
| }, | |
| { | |
| "epoch": 0.2484740452654891, | |
| "grad_norm": 0.09417334198951721, | |
| "learning_rate": 0.00019464553943924164, | |
| "loss": 0.0856, | |
| "num_input_tokens_seen": 4988832, | |
| "step": 575, | |
| "train_runtime": 3932.5558, | |
| "train_tokens_per_second": 1268.598 | |
| }, | |
| { | |
| "epoch": 0.2506346891373629, | |
| "grad_norm": 0.09484616667032242, | |
| "learning_rate": 0.00019453302641925227, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 5032144, | |
| "step": 580, | |
| "train_runtime": 3954.2714, | |
| "train_tokens_per_second": 1272.584 | |
| }, | |
| { | |
| "epoch": 0.25279533300923673, | |
| "grad_norm": 0.07154662162065506, | |
| "learning_rate": 0.00019441937674999468, | |
| "loss": 0.0791, | |
| "num_input_tokens_seen": 5075712, | |
| "step": 585, | |
| "train_runtime": 3976.0177, | |
| "train_tokens_per_second": 1276.582 | |
| }, | |
| { | |
| "epoch": 0.25495597688111055, | |
| "grad_norm": 0.08197880536317825, | |
| "learning_rate": 0.00019430459179797343, | |
| "loss": 0.0776, | |
| "num_input_tokens_seen": 5118784, | |
| "step": 590, | |
| "train_runtime": 3998.0938, | |
| "train_tokens_per_second": 1280.306 | |
| }, | |
| { | |
| "epoch": 0.2571166207529844, | |
| "grad_norm": 0.09517450630664825, | |
| "learning_rate": 0.00019418867294334355, | |
| "loss": 0.0783, | |
| "num_input_tokens_seen": 5162224, | |
| "step": 595, | |
| "train_runtime": 4019.3148, | |
| "train_tokens_per_second": 1284.354 | |
| }, | |
| { | |
| "epoch": 0.25927726462485823, | |
| "grad_norm": 0.07223788648843765, | |
| "learning_rate": 0.00019407162157989393, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 5205120, | |
| "step": 600, | |
| "train_runtime": 4040.9479, | |
| "train_tokens_per_second": 1288.094 | |
| }, | |
| { | |
| "epoch": 0.25927726462485823, | |
| "eval_loss": 0.07911964505910873, | |
| "eval_runtime": 711.8752, | |
| "eval_samples_per_second": 13.004, | |
| "eval_steps_per_second": 6.503, | |
| "num_input_tokens_seen": 5205120, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 0.09286199510097504, | |
| "learning_rate": 0.00019395343911503057, | |
| "loss": 0.0843, | |
| "num_input_tokens_seen": 5248688, | |
| "step": 605, | |
| "train_runtime": 4780.1535, | |
| "train_tokens_per_second": 1098.017 | |
| }, | |
| { | |
| "epoch": 0.26359855236860585, | |
| "grad_norm": 0.08078984916210175, | |
| "learning_rate": 0.00019383412696975956, | |
| "loss": 0.0788, | |
| "num_input_tokens_seen": 5292032, | |
| "step": 610, | |
| "train_runtime": 4801.333, | |
| "train_tokens_per_second": 1102.201 | |
| }, | |
| { | |
| "epoch": 0.26575919624047967, | |
| "grad_norm": 0.07582154124975204, | |
| "learning_rate": 0.0001937136865786702, | |
| "loss": 0.0876, | |
| "num_input_tokens_seen": 5335920, | |
| "step": 615, | |
| "train_runtime": 4824.0, | |
| "train_tokens_per_second": 1106.119 | |
| }, | |
| { | |
| "epoch": 0.2679198401123535, | |
| "grad_norm": 0.0737927258014679, | |
| "learning_rate": 0.00019359211938991755, | |
| "loss": 0.0784, | |
| "num_input_tokens_seen": 5379328, | |
| "step": 620, | |
| "train_runtime": 4845.4042, | |
| "train_tokens_per_second": 1110.192 | |
| }, | |
| { | |
| "epoch": 0.2700804839842273, | |
| "grad_norm": 0.09328042715787888, | |
| "learning_rate": 0.0001934694268652051, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 5422832, | |
| "step": 625, | |
| "train_runtime": 4867.5202, | |
| "train_tokens_per_second": 1114.085 | |
| }, | |
| { | |
| "epoch": 0.2722411278561011, | |
| "grad_norm": 0.07202576845884323, | |
| "learning_rate": 0.00019334561047976723, | |
| "loss": 0.0837, | |
| "num_input_tokens_seen": 5466160, | |
| "step": 630, | |
| "train_runtime": 4888.8853, | |
| "train_tokens_per_second": 1118.079 | |
| }, | |
| { | |
| "epoch": 0.2744017717279749, | |
| "grad_norm": 0.07730934768915176, | |
| "learning_rate": 0.00019322067172235138, | |
| "loss": 0.0864, | |
| "num_input_tokens_seen": 5509760, | |
| "step": 635, | |
| "train_runtime": 4910.512, | |
| "train_tokens_per_second": 1122.034 | |
| }, | |
| { | |
| "epoch": 0.27656241559984873, | |
| "grad_norm": 0.06718683242797852, | |
| "learning_rate": 0.0001930946120952003, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 5552416, | |
| "step": 640, | |
| "train_runtime": 4931.3543, | |
| "train_tokens_per_second": 1125.941 | |
| }, | |
| { | |
| "epoch": 0.2787230594717226, | |
| "grad_norm": 0.07979665696620941, | |
| "learning_rate": 0.00019296743311403376, | |
| "loss": 0.0909, | |
| "num_input_tokens_seen": 5595936, | |
| "step": 645, | |
| "train_runtime": 4952.8952, | |
| "train_tokens_per_second": 1129.831 | |
| }, | |
| { | |
| "epoch": 0.2808837033435964, | |
| "grad_norm": 0.08611753582954407, | |
| "learning_rate": 0.00019283913630803055, | |
| "loss": 0.0883, | |
| "num_input_tokens_seen": 5639392, | |
| "step": 650, | |
| "train_runtime": 4974.7226, | |
| "train_tokens_per_second": 1133.609 | |
| }, | |
| { | |
| "epoch": 0.2830443472154702, | |
| "grad_norm": 0.0799168050289154, | |
| "learning_rate": 0.00019270972321980991, | |
| "loss": 0.0816, | |
| "num_input_tokens_seen": 5682688, | |
| "step": 655, | |
| "train_runtime": 4995.9321, | |
| "train_tokens_per_second": 1137.463 | |
| }, | |
| { | |
| "epoch": 0.28520499108734404, | |
| "grad_norm": 0.0729471817612648, | |
| "learning_rate": 0.0001925791954054132, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 5725632, | |
| "step": 660, | |
| "train_runtime": 5017.5209, | |
| "train_tokens_per_second": 1141.128 | |
| }, | |
| { | |
| "epoch": 0.28736563495921785, | |
| "grad_norm": 0.10275959223508835, | |
| "learning_rate": 0.00019244755443428494, | |
| "loss": 0.0841, | |
| "num_input_tokens_seen": 5769488, | |
| "step": 665, | |
| "train_runtime": 5039.1393, | |
| "train_tokens_per_second": 1144.935 | |
| }, | |
| { | |
| "epoch": 0.28952627883109167, | |
| "grad_norm": 0.0760921835899353, | |
| "learning_rate": 0.00019231480188925412, | |
| "loss": 0.0833, | |
| "num_input_tokens_seen": 5812736, | |
| "step": 670, | |
| "train_runtime": 5060.9978, | |
| "train_tokens_per_second": 1148.536 | |
| }, | |
| { | |
| "epoch": 0.2916869227029655, | |
| "grad_norm": 0.0875801295042038, | |
| "learning_rate": 0.0001921809393665151, | |
| "loss": 0.0817, | |
| "num_input_tokens_seen": 5856224, | |
| "step": 675, | |
| "train_runtime": 5082.9523, | |
| "train_tokens_per_second": 1152.13 | |
| }, | |
| { | |
| "epoch": 0.2938475665748393, | |
| "grad_norm": 0.07346130162477493, | |
| "learning_rate": 0.0001920459684756084, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 5899728, | |
| "step": 680, | |
| "train_runtime": 5104.4384, | |
| "train_tokens_per_second": 1155.804 | |
| }, | |
| { | |
| "epoch": 0.2960082104467131, | |
| "grad_norm": 0.07405713200569153, | |
| "learning_rate": 0.00019190989083940144, | |
| "loss": 0.08, | |
| "num_input_tokens_seen": 5943184, | |
| "step": 685, | |
| "train_runtime": 5126.9699, | |
| "train_tokens_per_second": 1159.2 | |
| }, | |
| { | |
| "epoch": 0.2981688543185869, | |
| "grad_norm": 0.074583999812603, | |
| "learning_rate": 0.00019177270809406886, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 5986528, | |
| "step": 690, | |
| "train_runtime": 5148.3756, | |
| "train_tokens_per_second": 1162.799 | |
| }, | |
| { | |
| "epoch": 0.30032949819046073, | |
| "grad_norm": 0.08363614976406097, | |
| "learning_rate": 0.00019163442188907306, | |
| "loss": 0.0789, | |
| "num_input_tokens_seen": 6029792, | |
| "step": 695, | |
| "train_runtime": 5170.5975, | |
| "train_tokens_per_second": 1166.169 | |
| }, | |
| { | |
| "epoch": 0.3024901420623346, | |
| "grad_norm": 0.07529831677675247, | |
| "learning_rate": 0.00019149503388714414, | |
| "loss": 0.0782, | |
| "num_input_tokens_seen": 6072960, | |
| "step": 700, | |
| "train_runtime": 5192.0761, | |
| "train_tokens_per_second": 1169.659 | |
| }, | |
| { | |
| "epoch": 0.3046507859342084, | |
| "grad_norm": 0.08652273565530777, | |
| "learning_rate": 0.0001913545457642601, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 6116880, | |
| "step": 705, | |
| "train_runtime": 5214.4006, | |
| "train_tokens_per_second": 1173.074 | |
| }, | |
| { | |
| "epoch": 0.3068114298060822, | |
| "grad_norm": 0.09629788249731064, | |
| "learning_rate": 0.00019121295920962662, | |
| "loss": 0.0767, | |
| "num_input_tokens_seen": 6160256, | |
| "step": 710, | |
| "train_runtime": 5235.856, | |
| "train_tokens_per_second": 1176.552 | |
| }, | |
| { | |
| "epoch": 0.30897207367795604, | |
| "grad_norm": 0.06942661106586456, | |
| "learning_rate": 0.00019107027592565662, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 6203664, | |
| "step": 715, | |
| "train_runtime": 5258.1546, | |
| "train_tokens_per_second": 1179.818 | |
| }, | |
| { | |
| "epoch": 0.31113271754982985, | |
| "grad_norm": 0.05736853554844856, | |
| "learning_rate": 0.00019092649762795009, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 6246864, | |
| "step": 720, | |
| "train_runtime": 5279.6223, | |
| "train_tokens_per_second": 1183.203 | |
| }, | |
| { | |
| "epoch": 0.31329336142170366, | |
| "grad_norm": 0.08216078579425812, | |
| "learning_rate": 0.00019078162604527313, | |
| "loss": 0.0786, | |
| "num_input_tokens_seen": 6290208, | |
| "step": 725, | |
| "train_runtime": 5301.567, | |
| "train_tokens_per_second": 1186.481 | |
| }, | |
| { | |
| "epoch": 0.3154540052935775, | |
| "grad_norm": 0.07779684662818909, | |
| "learning_rate": 0.00019063566291953739, | |
| "loss": 0.0706, | |
| "num_input_tokens_seen": 6333120, | |
| "step": 730, | |
| "train_runtime": 5323.539, | |
| "train_tokens_per_second": 1189.645 | |
| }, | |
| { | |
| "epoch": 0.3176146491654513, | |
| "grad_norm": 0.0775391012430191, | |
| "learning_rate": 0.00019048861000577904, | |
| "loss": 0.0763, | |
| "num_input_tokens_seen": 6376640, | |
| "step": 735, | |
| "train_runtime": 5346.0257, | |
| "train_tokens_per_second": 1192.781 | |
| }, | |
| { | |
| "epoch": 0.3197752930373251, | |
| "grad_norm": 0.08086078613996506, | |
| "learning_rate": 0.00019034046907213768, | |
| "loss": 0.086, | |
| "num_input_tokens_seen": 6420272, | |
| "step": 740, | |
| "train_runtime": 5367.655, | |
| "train_tokens_per_second": 1196.104 | |
| }, | |
| { | |
| "epoch": 0.3219359369091989, | |
| "grad_norm": 0.10933763533830643, | |
| "learning_rate": 0.00019019124189983502, | |
| "loss": 0.0865, | |
| "num_input_tokens_seen": 6464288, | |
| "step": 745, | |
| "train_runtime": 5389.0206, | |
| "train_tokens_per_second": 1199.529 | |
| }, | |
| { | |
| "epoch": 0.3240965807810728, | |
| "grad_norm": 0.07312079519033432, | |
| "learning_rate": 0.00019004093028315367, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 6507472, | |
| "step": 750, | |
| "train_runtime": 5410.7467, | |
| "train_tokens_per_second": 1202.694 | |
| }, | |
| { | |
| "epoch": 0.3262572246529466, | |
| "grad_norm": 0.09135115891695023, | |
| "learning_rate": 0.00018988953602941522, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 6551152, | |
| "step": 755, | |
| "train_runtime": 5432.6461, | |
| "train_tokens_per_second": 1205.886 | |
| }, | |
| { | |
| "epoch": 0.3284178685248204, | |
| "grad_norm": 0.07926656305789948, | |
| "learning_rate": 0.00018973706095895887, | |
| "loss": 0.0801, | |
| "num_input_tokens_seen": 6594464, | |
| "step": 760, | |
| "train_runtime": 5453.9931, | |
| "train_tokens_per_second": 1209.107 | |
| }, | |
| { | |
| "epoch": 0.3305785123966942, | |
| "grad_norm": 0.07842066138982773, | |
| "learning_rate": 0.00018958350690511928, | |
| "loss": 0.0708, | |
| "num_input_tokens_seen": 6637648, | |
| "step": 765, | |
| "train_runtime": 5475.6934, | |
| "train_tokens_per_second": 1212.202 | |
| }, | |
| { | |
| "epoch": 0.33273915626856804, | |
| "grad_norm": 0.062182243913412094, | |
| "learning_rate": 0.00018942887571420469, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 6680960, | |
| "step": 770, | |
| "train_runtime": 5496.9379, | |
| "train_tokens_per_second": 1215.397 | |
| }, | |
| { | |
| "epoch": 0.33489980014044185, | |
| "grad_norm": 0.08400420844554901, | |
| "learning_rate": 0.0001892731692454746, | |
| "loss": 0.0793, | |
| "num_input_tokens_seen": 6724688, | |
| "step": 775, | |
| "train_runtime": 5519.4401, | |
| "train_tokens_per_second": 1218.364 | |
| }, | |
| { | |
| "epoch": 0.33706044401231566, | |
| "grad_norm": 0.07636286318302155, | |
| "learning_rate": 0.0001891163893711175, | |
| "loss": 0.0746, | |
| "num_input_tokens_seen": 6767856, | |
| "step": 780, | |
| "train_runtime": 5541.114, | |
| "train_tokens_per_second": 1221.389 | |
| }, | |
| { | |
| "epoch": 0.3392210878841895, | |
| "grad_norm": 0.08771245926618576, | |
| "learning_rate": 0.00018895853797622837, | |
| "loss": 0.0856, | |
| "num_input_tokens_seen": 6811408, | |
| "step": 785, | |
| "train_runtime": 5562.918, | |
| "train_tokens_per_second": 1224.431 | |
| }, | |
| { | |
| "epoch": 0.3413817317560633, | |
| "grad_norm": 0.07764877378940582, | |
| "learning_rate": 0.00018879961695878586, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 6854928, | |
| "step": 790, | |
| "train_runtime": 5584.5564, | |
| "train_tokens_per_second": 1227.479 | |
| }, | |
| { | |
| "epoch": 0.3435423756279371, | |
| "grad_norm": 0.07771775126457214, | |
| "learning_rate": 0.00018863962822962974, | |
| "loss": 0.076, | |
| "num_input_tokens_seen": 6898064, | |
| "step": 795, | |
| "train_runtime": 5606.9982, | |
| "train_tokens_per_second": 1230.26 | |
| }, | |
| { | |
| "epoch": 0.34570301949981097, | |
| "grad_norm": 0.07678196579217911, | |
| "learning_rate": 0.00018847857371243762, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 6941760, | |
| "step": 800, | |
| "train_runtime": 5628.6983, | |
| "train_tokens_per_second": 1233.28 | |
| }, | |
| { | |
| "epoch": 0.34570301949981097, | |
| "eval_loss": 0.07707177847623825, | |
| "eval_runtime": 710.7395, | |
| "eval_samples_per_second": 13.024, | |
| "eval_steps_per_second": 6.513, | |
| "num_input_tokens_seen": 6941760, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3478636633716848, | |
| "grad_norm": 0.0912775844335556, | |
| "learning_rate": 0.000188316455343702, | |
| "loss": 0.0762, | |
| "num_input_tokens_seen": 6984928, | |
| "step": 805, | |
| "train_runtime": 6366.9321, | |
| "train_tokens_per_second": 1097.063 | |
| }, | |
| { | |
| "epoch": 0.3500243072435586, | |
| "grad_norm": 0.08312050998210907, | |
| "learning_rate": 0.00018815327507270703, | |
| "loss": 0.0817, | |
| "num_input_tokens_seen": 7028864, | |
| "step": 810, | |
| "train_runtime": 6390.056, | |
| "train_tokens_per_second": 1099.969 | |
| }, | |
| { | |
| "epoch": 0.3521849511154324, | |
| "grad_norm": 0.09067723900079727, | |
| "learning_rate": 0.00018798903486150494, | |
| "loss": 0.0858, | |
| "num_input_tokens_seen": 7072528, | |
| "step": 815, | |
| "train_runtime": 6412.3692, | |
| "train_tokens_per_second": 1102.951 | |
| }, | |
| { | |
| "epoch": 0.3543455949873062, | |
| "grad_norm": 0.09034962207078934, | |
| "learning_rate": 0.0001878237366848925, | |
| "loss": 0.0811, | |
| "num_input_tokens_seen": 7116176, | |
| "step": 820, | |
| "train_runtime": 6434.2034, | |
| "train_tokens_per_second": 1105.992 | |
| }, | |
| { | |
| "epoch": 0.35650623885918004, | |
| "grad_norm": 0.07377108186483383, | |
| "learning_rate": 0.00018765738253038726, | |
| "loss": 0.0759, | |
| "num_input_tokens_seen": 7159504, | |
| "step": 825, | |
| "train_runtime": 6455.9184, | |
| "train_tokens_per_second": 1108.983 | |
| }, | |
| { | |
| "epoch": 0.35866688273105385, | |
| "grad_norm": 0.08932390064001083, | |
| "learning_rate": 0.00018748997439820372, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 7202400, | |
| "step": 830, | |
| "train_runtime": 6478.1722, | |
| "train_tokens_per_second": 1111.795 | |
| }, | |
| { | |
| "epoch": 0.36082752660292766, | |
| "grad_norm": 0.06967565417289734, | |
| "learning_rate": 0.0001873215143012292, | |
| "loss": 0.0763, | |
| "num_input_tokens_seen": 7245616, | |
| "step": 835, | |
| "train_runtime": 6499.9987, | |
| "train_tokens_per_second": 1114.71 | |
| }, | |
| { | |
| "epoch": 0.3629881704748015, | |
| "grad_norm": 0.08724388480186462, | |
| "learning_rate": 0.00018715200426499973, | |
| "loss": 0.0818, | |
| "num_input_tokens_seen": 7289264, | |
| "step": 840, | |
| "train_runtime": 6521.9735, | |
| "train_tokens_per_second": 1117.647 | |
| }, | |
| { | |
| "epoch": 0.3651488143466753, | |
| "grad_norm": 0.06765586882829666, | |
| "learning_rate": 0.00018698144632767547, | |
| "loss": 0.08, | |
| "num_input_tokens_seen": 7332992, | |
| "step": 845, | |
| "train_runtime": 6544.5412, | |
| "train_tokens_per_second": 1120.475 | |
| }, | |
| { | |
| "epoch": 0.3673094582185491, | |
| "grad_norm": 0.08761299401521683, | |
| "learning_rate": 0.00018680984254001656, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 7376768, | |
| "step": 850, | |
| "train_runtime": 6566.4346, | |
| "train_tokens_per_second": 1123.405 | |
| }, | |
| { | |
| "epoch": 0.36947010209042297, | |
| "grad_norm": 0.0771804228425026, | |
| "learning_rate": 0.00018663719496535817, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 7420656, | |
| "step": 855, | |
| "train_runtime": 6588.7311, | |
| "train_tokens_per_second": 1126.265 | |
| }, | |
| { | |
| "epoch": 0.3716307459622968, | |
| "grad_norm": 0.0810341015458107, | |
| "learning_rate": 0.00018646350567958582, | |
| "loss": 0.0794, | |
| "num_input_tokens_seen": 7463984, | |
| "step": 860, | |
| "train_runtime": 6610.4567, | |
| "train_tokens_per_second": 1129.118 | |
| }, | |
| { | |
| "epoch": 0.3737913898341706, | |
| "grad_norm": 0.07731346040964127, | |
| "learning_rate": 0.0001862887767711103, | |
| "loss": 0.0755, | |
| "num_input_tokens_seen": 7507264, | |
| "step": 865, | |
| "train_runtime": 6631.9401, | |
| "train_tokens_per_second": 1131.986 | |
| }, | |
| { | |
| "epoch": 0.3759520337060444, | |
| "grad_norm": 0.06987843662500381, | |
| "learning_rate": 0.00018611301034084283, | |
| "loss": 0.0796, | |
| "num_input_tokens_seen": 7550544, | |
| "step": 870, | |
| "train_runtime": 6653.1898, | |
| "train_tokens_per_second": 1134.876 | |
| }, | |
| { | |
| "epoch": 0.3781126775779182, | |
| "grad_norm": 0.08283229172229767, | |
| "learning_rate": 0.00018593620850216943, | |
| "loss": 0.0909, | |
| "num_input_tokens_seen": 7594592, | |
| "step": 875, | |
| "train_runtime": 6674.6719, | |
| "train_tokens_per_second": 1137.823 | |
| }, | |
| { | |
| "epoch": 0.38027332144979203, | |
| "grad_norm": 0.0696164071559906, | |
| "learning_rate": 0.00018575837338092582, | |
| "loss": 0.0745, | |
| "num_input_tokens_seen": 7637744, | |
| "step": 880, | |
| "train_runtime": 6696.8868, | |
| "train_tokens_per_second": 1140.492 | |
| }, | |
| { | |
| "epoch": 0.38243396532166585, | |
| "grad_norm": 0.07360994070768356, | |
| "learning_rate": 0.00018557950711537173, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 7681232, | |
| "step": 885, | |
| "train_runtime": 6719.3031, | |
| "train_tokens_per_second": 1143.159 | |
| }, | |
| { | |
| "epoch": 0.38459460919353966, | |
| "grad_norm": 0.07119850069284439, | |
| "learning_rate": 0.0001853996118561651, | |
| "loss": 0.0732, | |
| "num_input_tokens_seen": 7724352, | |
| "step": 890, | |
| "train_runtime": 6741.235, | |
| "train_tokens_per_second": 1145.836 | |
| }, | |
| { | |
| "epoch": 0.3867552530654135, | |
| "grad_norm": 0.06476875394582748, | |
| "learning_rate": 0.0001852186897663364, | |
| "loss": 0.0695, | |
| "num_input_tokens_seen": 7767408, | |
| "step": 895, | |
| "train_runtime": 6762.9413, | |
| "train_tokens_per_second": 1148.525 | |
| }, | |
| { | |
| "epoch": 0.3889158969372873, | |
| "grad_norm": 0.07978302985429764, | |
| "learning_rate": 0.00018503674302126254, | |
| "loss": 0.0743, | |
| "num_input_tokens_seen": 7810688, | |
| "step": 900, | |
| "train_runtime": 6785.1546, | |
| "train_tokens_per_second": 1151.144 | |
| }, | |
| { | |
| "epoch": 0.39107654080916116, | |
| "grad_norm": 0.05432932823896408, | |
| "learning_rate": 0.00018485377380864069, | |
| "loss": 0.0769, | |
| "num_input_tokens_seen": 7854352, | |
| "step": 905, | |
| "train_runtime": 6808.2135, | |
| "train_tokens_per_second": 1153.658 | |
| }, | |
| { | |
| "epoch": 0.39323718468103497, | |
| "grad_norm": 0.08450411260128021, | |
| "learning_rate": 0.00018466978432846198, | |
| "loss": 0.0813, | |
| "num_input_tokens_seen": 7897824, | |
| "step": 910, | |
| "train_runtime": 6829.4279, | |
| "train_tokens_per_second": 1156.44 | |
| }, | |
| { | |
| "epoch": 0.3953978285529088, | |
| "grad_norm": 0.08708320558071136, | |
| "learning_rate": 0.00018448477679298508, | |
| "loss": 0.0828, | |
| "num_input_tokens_seen": 7941424, | |
| "step": 915, | |
| "train_runtime": 6851.7426, | |
| "train_tokens_per_second": 1159.037 | |
| }, | |
| { | |
| "epoch": 0.3975584724247826, | |
| "grad_norm": 0.07201456278562546, | |
| "learning_rate": 0.00018429875342670964, | |
| "loss": 0.0742, | |
| "num_input_tokens_seen": 7984608, | |
| "step": 920, | |
| "train_runtime": 6873.2801, | |
| "train_tokens_per_second": 1161.688 | |
| }, | |
| { | |
| "epoch": 0.3997191162966564, | |
| "grad_norm": 0.07260388880968094, | |
| "learning_rate": 0.00018411171646634937, | |
| "loss": 0.0744, | |
| "num_input_tokens_seen": 8028192, | |
| "step": 925, | |
| "train_runtime": 6894.4722, | |
| "train_tokens_per_second": 1164.439 | |
| }, | |
| { | |
| "epoch": 0.4018797601685302, | |
| "grad_norm": 0.08266861736774445, | |
| "learning_rate": 0.00018392366816080542, | |
| "loss": 0.0794, | |
| "num_input_tokens_seen": 8071584, | |
| "step": 930, | |
| "train_runtime": 6916.4175, | |
| "train_tokens_per_second": 1167.018 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 0.08442539721727371, | |
| "learning_rate": 0.00018373461077113908, | |
| "loss": 0.0837, | |
| "num_input_tokens_seen": 8115056, | |
| "step": 935, | |
| "train_runtime": 6938.0681, | |
| "train_tokens_per_second": 1169.642 | |
| }, | |
| { | |
| "epoch": 0.40620104791227785, | |
| "grad_norm": 0.07598377764225006, | |
| "learning_rate": 0.00018354454657054469, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 8158976, | |
| "step": 940, | |
| "train_runtime": 6959.6868, | |
| "train_tokens_per_second": 1172.319 | |
| }, | |
| { | |
| "epoch": 0.40836169178415166, | |
| "grad_norm": 0.06979737430810928, | |
| "learning_rate": 0.00018335347784432236, | |
| "loss": 0.0846, | |
| "num_input_tokens_seen": 8203008, | |
| "step": 945, | |
| "train_runtime": 6981.6345, | |
| "train_tokens_per_second": 1174.941 | |
| }, | |
| { | |
| "epoch": 0.41052233565602547, | |
| "grad_norm": 0.08268395811319351, | |
| "learning_rate": 0.00018316140688985047, | |
| "loss": 0.0813, | |
| "num_input_tokens_seen": 8246112, | |
| "step": 950, | |
| "train_runtime": 7002.7827, | |
| "train_tokens_per_second": 1177.548 | |
| }, | |
| { | |
| "epoch": 0.41268297952789934, | |
| "grad_norm": 0.0866621881723404, | |
| "learning_rate": 0.00018296833601655794, | |
| "loss": 0.0759, | |
| "num_input_tokens_seen": 8289408, | |
| "step": 955, | |
| "train_runtime": 7024.109, | |
| "train_tokens_per_second": 1180.137 | |
| }, | |
| { | |
| "epoch": 0.41484362339977315, | |
| "grad_norm": 0.0772893950343132, | |
| "learning_rate": 0.0001827742675458966, | |
| "loss": 0.0768, | |
| "num_input_tokens_seen": 8332832, | |
| "step": 960, | |
| "train_runtime": 7045.8829, | |
| "train_tokens_per_second": 1182.653 | |
| }, | |
| { | |
| "epoch": 0.41700426727164697, | |
| "grad_norm": 0.07792758196592331, | |
| "learning_rate": 0.00018257920381131327, | |
| "loss": 0.0824, | |
| "num_input_tokens_seen": 8376720, | |
| "step": 965, | |
| "train_runtime": 7067.7123, | |
| "train_tokens_per_second": 1185.21 | |
| }, | |
| { | |
| "epoch": 0.4191649111435208, | |
| "grad_norm": 0.07842139154672623, | |
| "learning_rate": 0.00018238314715822158, | |
| "loss": 0.0836, | |
| "num_input_tokens_seen": 8420304, | |
| "step": 970, | |
| "train_runtime": 7089.4634, | |
| "train_tokens_per_second": 1187.721 | |
| }, | |
| { | |
| "epoch": 0.4213255550153946, | |
| "grad_norm": 0.07367521524429321, | |
| "learning_rate": 0.00018218609994397387, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 8463904, | |
| "step": 975, | |
| "train_runtime": 7111.0203, | |
| "train_tokens_per_second": 1190.252 | |
| }, | |
| { | |
| "epoch": 0.4234861988872684, | |
| "grad_norm": 0.06106347218155861, | |
| "learning_rate": 0.0001819880645378328, | |
| "loss": 0.0787, | |
| "num_input_tokens_seen": 8507328, | |
| "step": 980, | |
| "train_runtime": 7133.5273, | |
| "train_tokens_per_second": 1192.584 | |
| }, | |
| { | |
| "epoch": 0.4256468427591422, | |
| "grad_norm": 0.04488658905029297, | |
| "learning_rate": 0.00018178904332094293, | |
| "loss": 0.0685, | |
| "num_input_tokens_seen": 8550368, | |
| "step": 985, | |
| "train_runtime": 7155.5568, | |
| "train_tokens_per_second": 1194.927 | |
| }, | |
| { | |
| "epoch": 0.42780748663101603, | |
| "grad_norm": 0.0789346843957901, | |
| "learning_rate": 0.00018158903868630203, | |
| "loss": 0.0916, | |
| "num_input_tokens_seen": 8594080, | |
| "step": 990, | |
| "train_runtime": 7177.2878, | |
| "train_tokens_per_second": 1197.399 | |
| }, | |
| { | |
| "epoch": 0.42996813050288984, | |
| "grad_norm": 0.08458510786294937, | |
| "learning_rate": 0.0001813880530387323, | |
| "loss": 0.0702, | |
| "num_input_tokens_seen": 8637200, | |
| "step": 995, | |
| "train_runtime": 7199.493, | |
| "train_tokens_per_second": 1199.696 | |
| }, | |
| { | |
| "epoch": 0.43212877437476366, | |
| "grad_norm": 0.07093961536884308, | |
| "learning_rate": 0.0001811860887948515, | |
| "loss": 0.0763, | |
| "num_input_tokens_seen": 8680800, | |
| "step": 1000, | |
| "train_runtime": 7222.5962, | |
| "train_tokens_per_second": 1201.895 | |
| }, | |
| { | |
| "epoch": 0.43212877437476366, | |
| "eval_loss": 0.07520591467618942, | |
| "eval_runtime": 12182.4571, | |
| "eval_samples_per_second": 0.76, | |
| "eval_steps_per_second": 0.38, | |
| "num_input_tokens_seen": 8680800, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.43428941824663747, | |
| "grad_norm": 0.08109795302152634, | |
| "learning_rate": 0.00018098314838304382, | |
| "loss": 0.0744, | |
| "num_input_tokens_seen": 8724480, | |
| "step": 1005, | |
| "train_runtime": 19429.5699, | |
| "train_tokens_per_second": 449.031 | |
| }, | |
| { | |
| "epoch": 0.43645006211851134, | |
| "grad_norm": 0.0698382705450058, | |
| "learning_rate": 0.00018077923424343083, | |
| "loss": 0.0702, | |
| "num_input_tokens_seen": 8767712, | |
| "step": 1010, | |
| "train_runtime": 19451.8383, | |
| "train_tokens_per_second": 450.74 | |
| }, | |
| { | |
| "epoch": 0.43861070599038515, | |
| "grad_norm": 0.07674521207809448, | |
| "learning_rate": 0.00018057434882784188, | |
| "loss": 0.0738, | |
| "num_input_tokens_seen": 8811312, | |
| "step": 1015, | |
| "train_runtime": 19473.1463, | |
| "train_tokens_per_second": 452.485 | |
| }, | |
| { | |
| "epoch": 0.44077134986225897, | |
| "grad_norm": 0.07470937073230743, | |
| "learning_rate": 0.00018036849459978493, | |
| "loss": 0.0746, | |
| "num_input_tokens_seen": 8854448, | |
| "step": 1020, | |
| "train_runtime": 19494.278, | |
| "train_tokens_per_second": 454.208 | |
| }, | |
| { | |
| "epoch": 0.4429319937341328, | |
| "grad_norm": 0.06504765897989273, | |
| "learning_rate": 0.00018016167403441674, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 8897664, | |
| "step": 1025, | |
| "train_runtime": 19515.6756, | |
| "train_tokens_per_second": 455.924 | |
| }, | |
| { | |
| "epoch": 0.4450926376060066, | |
| "grad_norm": 0.07647648453712463, | |
| "learning_rate": 0.00017995388961851308, | |
| "loss": 0.0818, | |
| "num_input_tokens_seen": 8941408, | |
| "step": 1030, | |
| "train_runtime": 19537.3869, | |
| "train_tokens_per_second": 457.656 | |
| }, | |
| { | |
| "epoch": 0.4472532814778804, | |
| "grad_norm": 0.08472959697246552, | |
| "learning_rate": 0.00017974514385043897, | |
| "loss": 0.0793, | |
| "num_input_tokens_seen": 8984800, | |
| "step": 1035, | |
| "train_runtime": 19558.7936, | |
| "train_tokens_per_second": 459.374 | |
| }, | |
| { | |
| "epoch": 0.4494139253497542, | |
| "grad_norm": 0.06375865638256073, | |
| "learning_rate": 0.00017953543924011854, | |
| "loss": 0.0807, | |
| "num_input_tokens_seen": 9028528, | |
| "step": 1040, | |
| "train_runtime": 19580.2573, | |
| "train_tokens_per_second": 461.104 | |
| }, | |
| { | |
| "epoch": 0.45157456922162803, | |
| "grad_norm": 0.06762372702360153, | |
| "learning_rate": 0.00017932477830900494, | |
| "loss": 0.0756, | |
| "num_input_tokens_seen": 9071760, | |
| "step": 1045, | |
| "train_runtime": 19602.2512, | |
| "train_tokens_per_second": 462.792 | |
| }, | |
| { | |
| "epoch": 0.45373521309350184, | |
| "grad_norm": 0.08145523816347122, | |
| "learning_rate": 0.00017911316359004982, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 9115312, | |
| "step": 1050, | |
| "train_runtime": 19624.3829, | |
| "train_tokens_per_second": 464.489 | |
| }, | |
| { | |
| "epoch": 0.45589585696537566, | |
| "grad_norm": 0.0655316486954689, | |
| "learning_rate": 0.0001789005976276731, | |
| "loss": 0.0742, | |
| "num_input_tokens_seen": 9158656, | |
| "step": 1055, | |
| "train_runtime": 19646.0519, | |
| "train_tokens_per_second": 466.183 | |
| }, | |
| { | |
| "epoch": 0.4580565008372495, | |
| "grad_norm": 0.07092972844839096, | |
| "learning_rate": 0.00017868708297773237, | |
| "loss": 0.0747, | |
| "num_input_tokens_seen": 9201744, | |
| "step": 1060, | |
| "train_runtime": 19667.4653, | |
| "train_tokens_per_second": 467.866 | |
| }, | |
| { | |
| "epoch": 0.46021714470912334, | |
| "grad_norm": 0.07497821748256683, | |
| "learning_rate": 0.00017847262220749196, | |
| "loss": 0.0809, | |
| "num_input_tokens_seen": 9245328, | |
| "step": 1065, | |
| "train_runtime": 19688.7343, | |
| "train_tokens_per_second": 469.575 | |
| }, | |
| { | |
| "epoch": 0.46237778858099715, | |
| "grad_norm": 0.07463043928146362, | |
| "learning_rate": 0.00017825721789559217, | |
| "loss": 0.0782, | |
| "num_input_tokens_seen": 9288800, | |
| "step": 1070, | |
| "train_runtime": 19710.406, | |
| "train_tokens_per_second": 471.264 | |
| }, | |
| { | |
| "epoch": 0.46453843245287096, | |
| "grad_norm": 0.050223931670188904, | |
| "learning_rate": 0.00017804087263201845, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 9332304, | |
| "step": 1075, | |
| "train_runtime": 19731.8737, | |
| "train_tokens_per_second": 472.956 | |
| }, | |
| { | |
| "epoch": 0.4666990763247448, | |
| "grad_norm": 0.07988058030605316, | |
| "learning_rate": 0.00017782358901806994, | |
| "loss": 0.0755, | |
| "num_input_tokens_seen": 9375888, | |
| "step": 1080, | |
| "train_runtime": 19753.6903, | |
| "train_tokens_per_second": 474.64 | |
| }, | |
| { | |
| "epoch": 0.4688597201966186, | |
| "grad_norm": 0.07957769185304642, | |
| "learning_rate": 0.00017760536966632842, | |
| "loss": 0.0817, | |
| "num_input_tokens_seen": 9419248, | |
| "step": 1085, | |
| "train_runtime": 19775.1655, | |
| "train_tokens_per_second": 476.317 | |
| }, | |
| { | |
| "epoch": 0.4710203640684924, | |
| "grad_norm": 0.059474822133779526, | |
| "learning_rate": 0.0001773862172006268, | |
| "loss": 0.0788, | |
| "num_input_tokens_seen": 9462496, | |
| "step": 1090, | |
| "train_runtime": 19797.2478, | |
| "train_tokens_per_second": 477.97 | |
| }, | |
| { | |
| "epoch": 0.4731810079403662, | |
| "grad_norm": 0.08469399064779282, | |
| "learning_rate": 0.00017716613425601763, | |
| "loss": 0.0823, | |
| "num_input_tokens_seen": 9506512, | |
| "step": 1095, | |
| "train_runtime": 19819.423, | |
| "train_tokens_per_second": 479.656 | |
| }, | |
| { | |
| "epoch": 0.47534165181224003, | |
| "grad_norm": 0.07042556256055832, | |
| "learning_rate": 0.00017694512347874133, | |
| "loss": 0.0781, | |
| "num_input_tokens_seen": 9550080, | |
| "step": 1100, | |
| "train_runtime": 19841.9888, | |
| "train_tokens_per_second": 481.307 | |
| }, | |
| { | |
| "epoch": 0.47750229568411384, | |
| "grad_norm": 0.06320291012525558, | |
| "learning_rate": 0.0001767231875261944, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 9593424, | |
| "step": 1105, | |
| "train_runtime": 19863.9008, | |
| "train_tokens_per_second": 482.958 | |
| }, | |
| { | |
| "epoch": 0.4796629395559877, | |
| "grad_norm": 0.06841768324375153, | |
| "learning_rate": 0.00017650032906689763, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 9636816, | |
| "step": 1110, | |
| "train_runtime": 19886.1652, | |
| "train_tokens_per_second": 484.599 | |
| }, | |
| { | |
| "epoch": 0.4818235834278615, | |
| "grad_norm": 0.07111469656229019, | |
| "learning_rate": 0.00017627655078046375, | |
| "loss": 0.0811, | |
| "num_input_tokens_seen": 9680368, | |
| "step": 1115, | |
| "train_runtime": 19908.3539, | |
| "train_tokens_per_second": 486.247 | |
| }, | |
| { | |
| "epoch": 0.48398422729973534, | |
| "grad_norm": 0.08030956983566284, | |
| "learning_rate": 0.00017605185535756536, | |
| "loss": 0.0729, | |
| "num_input_tokens_seen": 9723472, | |
| "step": 1120, | |
| "train_runtime": 19930.5485, | |
| "train_tokens_per_second": 487.868 | |
| }, | |
| { | |
| "epoch": 0.48614487117160915, | |
| "grad_norm": 0.06738725304603577, | |
| "learning_rate": 0.0001758262454999026, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 9766768, | |
| "step": 1125, | |
| "train_runtime": 19952.8417, | |
| "train_tokens_per_second": 489.493 | |
| }, | |
| { | |
| "epoch": 0.48830551504348296, | |
| "grad_norm": 0.053996093571186066, | |
| "learning_rate": 0.00017559972392017058, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 9810240, | |
| "step": 1130, | |
| "train_runtime": 19974.4015, | |
| "train_tokens_per_second": 491.141 | |
| }, | |
| { | |
| "epoch": 0.4904661589153568, | |
| "grad_norm": 0.08248726278543472, | |
| "learning_rate": 0.00017537229334202683, | |
| "loss": 0.0854, | |
| "num_input_tokens_seen": 9853920, | |
| "step": 1135, | |
| "train_runtime": 19996.79, | |
| "train_tokens_per_second": 492.775 | |
| }, | |
| { | |
| "epoch": 0.4926268027872306, | |
| "grad_norm": 0.05713077634572983, | |
| "learning_rate": 0.0001751439565000585, | |
| "loss": 0.0726, | |
| "num_input_tokens_seen": 9897120, | |
| "step": 1140, | |
| "train_runtime": 20018.8567, | |
| "train_tokens_per_second": 494.39 | |
| }, | |
| { | |
| "epoch": 0.4947874466591044, | |
| "grad_norm": 0.07025758922100067, | |
| "learning_rate": 0.00017491471613974947, | |
| "loss": 0.0817, | |
| "num_input_tokens_seen": 9940432, | |
| "step": 1145, | |
| "train_runtime": 20039.9461, | |
| "train_tokens_per_second": 496.031 | |
| }, | |
| { | |
| "epoch": 0.4969480905309782, | |
| "grad_norm": 0.07322239875793457, | |
| "learning_rate": 0.00017468457501744749, | |
| "loss": 0.0854, | |
| "num_input_tokens_seen": 9984032, | |
| "step": 1150, | |
| "train_runtime": 20061.3593, | |
| "train_tokens_per_second": 497.675 | |
| }, | |
| { | |
| "epoch": 0.49910873440285203, | |
| "grad_norm": 0.07295897603034973, | |
| "learning_rate": 0.0001744535359003308, | |
| "loss": 0.0833, | |
| "num_input_tokens_seen": 10028048, | |
| "step": 1155, | |
| "train_runtime": 20082.8787, | |
| "train_tokens_per_second": 499.333 | |
| }, | |
| { | |
| "epoch": 0.5012693782747258, | |
| "grad_norm": 0.07253481447696686, | |
| "learning_rate": 0.00017422160156637507, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 10071520, | |
| "step": 1160, | |
| "train_runtime": 20104.528, | |
| "train_tokens_per_second": 500.958 | |
| }, | |
| { | |
| "epoch": 0.5034300221465997, | |
| "grad_norm": 0.06147943064570427, | |
| "learning_rate": 0.0001739887748043198, | |
| "loss": 0.0712, | |
| "num_input_tokens_seen": 10114720, | |
| "step": 1165, | |
| "train_runtime": 20126.5851, | |
| "train_tokens_per_second": 502.555 | |
| }, | |
| { | |
| "epoch": 0.5055906660184735, | |
| "grad_norm": 0.07576042413711548, | |
| "learning_rate": 0.00017375505841363503, | |
| "loss": 0.0786, | |
| "num_input_tokens_seen": 10158080, | |
| "step": 1170, | |
| "train_runtime": 20148.2729, | |
| "train_tokens_per_second": 504.166 | |
| }, | |
| { | |
| "epoch": 0.5077513098903473, | |
| "grad_norm": 0.0773998275399208, | |
| "learning_rate": 0.00017352045520448742, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 10201312, | |
| "step": 1175, | |
| "train_runtime": 20169.9853, | |
| "train_tokens_per_second": 505.767 | |
| }, | |
| { | |
| "epoch": 0.5099119537622211, | |
| "grad_norm": 0.0845336839556694, | |
| "learning_rate": 0.0001732849679977067, | |
| "loss": 0.0742, | |
| "num_input_tokens_seen": 10244464, | |
| "step": 1180, | |
| "train_runtime": 20192.6643, | |
| "train_tokens_per_second": 507.336 | |
| }, | |
| { | |
| "epoch": 0.512072597634095, | |
| "grad_norm": 0.059365056455135345, | |
| "learning_rate": 0.00017304859962475152, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 10287056, | |
| "step": 1185, | |
| "train_runtime": 20213.7952, | |
| "train_tokens_per_second": 508.913 | |
| }, | |
| { | |
| "epoch": 0.5142332415059688, | |
| "grad_norm": 0.05587423965334892, | |
| "learning_rate": 0.00017281135292767565, | |
| "loss": 0.0756, | |
| "num_input_tokens_seen": 10330336, | |
| "step": 1190, | |
| "train_runtime": 20234.9959, | |
| "train_tokens_per_second": 510.518 | |
| }, | |
| { | |
| "epoch": 0.5163938853778426, | |
| "grad_norm": 0.06663983315229416, | |
| "learning_rate": 0.00017257323075909359, | |
| "loss": 0.0722, | |
| "num_input_tokens_seen": 10373616, | |
| "step": 1195, | |
| "train_runtime": 20256.6032, | |
| "train_tokens_per_second": 512.11 | |
| }, | |
| { | |
| "epoch": 0.5185545292497165, | |
| "grad_norm": 0.05629422143101692, | |
| "learning_rate": 0.00017233423598214635, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 10417200, | |
| "step": 1200, | |
| "train_runtime": 20278.7181, | |
| "train_tokens_per_second": 513.701 | |
| }, | |
| { | |
| "epoch": 0.5185545292497165, | |
| "eval_loss": 0.07407065480947495, | |
| "eval_runtime": 713.8125, | |
| "eval_samples_per_second": 12.968, | |
| "eval_steps_per_second": 6.485, | |
| "num_input_tokens_seen": 10417200, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5207151731215902, | |
| "grad_norm": 0.06868501752614975, | |
| "learning_rate": 0.00017209437147046715, | |
| "loss": 0.0685, | |
| "num_input_tokens_seen": 10460224, | |
| "step": 1205, | |
| "train_runtime": 21016.8491, | |
| "train_tokens_per_second": 497.707 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 0.06577759236097336, | |
| "learning_rate": 0.0001718536401081466, | |
| "loss": 0.0736, | |
| "num_input_tokens_seen": 10503504, | |
| "step": 1210, | |
| "train_runtime": 21038.6716, | |
| "train_tokens_per_second": 499.247 | |
| }, | |
| { | |
| "epoch": 0.5250364608653378, | |
| "grad_norm": 0.0667291060090065, | |
| "learning_rate": 0.00017161204478969837, | |
| "loss": 0.0704, | |
| "num_input_tokens_seen": 10546496, | |
| "step": 1215, | |
| "train_runtime": 21060.9395, | |
| "train_tokens_per_second": 500.761 | |
| }, | |
| { | |
| "epoch": 0.5271971047372117, | |
| "grad_norm": 0.05542680621147156, | |
| "learning_rate": 0.00017136958842002401, | |
| "loss": 0.0646, | |
| "num_input_tokens_seen": 10589392, | |
| "step": 1220, | |
| "train_runtime": 21082.1205, | |
| "train_tokens_per_second": 502.293 | |
| }, | |
| { | |
| "epoch": 0.5293577486090855, | |
| "grad_norm": 0.06269074976444244, | |
| "learning_rate": 0.00017112627391437828, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 10632368, | |
| "step": 1225, | |
| "train_runtime": 21104.5922, | |
| "train_tokens_per_second": 503.794 | |
| }, | |
| { | |
| "epoch": 0.5315183924809593, | |
| "grad_norm": 0.07169587910175323, | |
| "learning_rate": 0.00017088210419833404, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 10675296, | |
| "step": 1230, | |
| "train_runtime": 21125.5563, | |
| "train_tokens_per_second": 505.326 | |
| }, | |
| { | |
| "epoch": 0.5336790363528331, | |
| "grad_norm": 0.08142554759979248, | |
| "learning_rate": 0.00017063708220774702, | |
| "loss": 0.0739, | |
| "num_input_tokens_seen": 10719072, | |
| "step": 1235, | |
| "train_runtime": 21147.4293, | |
| "train_tokens_per_second": 506.874 | |
| }, | |
| { | |
| "epoch": 0.535839680224707, | |
| "grad_norm": 0.05577947571873665, | |
| "learning_rate": 0.00017039121088872062, | |
| "loss": 0.0811, | |
| "num_input_tokens_seen": 10762544, | |
| "step": 1240, | |
| "train_runtime": 21169.3825, | |
| "train_tokens_per_second": 508.401 | |
| }, | |
| { | |
| "epoch": 0.5380003240965808, | |
| "grad_norm": 0.07115308195352554, | |
| "learning_rate": 0.0001701444931975703, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 10806464, | |
| "step": 1245, | |
| "train_runtime": 21190.8204, | |
| "train_tokens_per_second": 509.96 | |
| }, | |
| { | |
| "epoch": 0.5401609679684546, | |
| "grad_norm": 0.0784154161810875, | |
| "learning_rate": 0.00016989693210078835, | |
| "loss": 0.077, | |
| "num_input_tokens_seen": 10849680, | |
| "step": 1250, | |
| "train_runtime": 21212.6054, | |
| "train_tokens_per_second": 511.473 | |
| }, | |
| { | |
| "epoch": 0.5423216118403285, | |
| "grad_norm": 0.06607118248939514, | |
| "learning_rate": 0.00016964853057500778, | |
| "loss": 0.0776, | |
| "num_input_tokens_seen": 10893376, | |
| "step": 1255, | |
| "train_runtime": 21234.3551, | |
| "train_tokens_per_second": 513.007 | |
| }, | |
| { | |
| "epoch": 0.5444822557122022, | |
| "grad_norm": 0.08232490718364716, | |
| "learning_rate": 0.000169399291606967, | |
| "loss": 0.0856, | |
| "num_input_tokens_seen": 10937072, | |
| "step": 1260, | |
| "train_runtime": 21256.5851, | |
| "train_tokens_per_second": 514.526 | |
| }, | |
| { | |
| "epoch": 0.5466428995840761, | |
| "grad_norm": 0.05634531006217003, | |
| "learning_rate": 0.00016914921819347355, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 10979984, | |
| "step": 1265, | |
| "train_runtime": 21277.7493, | |
| "train_tokens_per_second": 516.031 | |
| }, | |
| { | |
| "epoch": 0.5488035434559498, | |
| "grad_norm": 0.06430606544017792, | |
| "learning_rate": 0.00016889831334136827, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 11023376, | |
| "step": 1270, | |
| "train_runtime": 21299.595, | |
| "train_tokens_per_second": 517.539 | |
| }, | |
| { | |
| "epoch": 0.5509641873278237, | |
| "grad_norm": 0.07790251821279526, | |
| "learning_rate": 0.00016864658006748905, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 11066864, | |
| "step": 1275, | |
| "train_runtime": 21321.0734, | |
| "train_tokens_per_second": 519.058 | |
| }, | |
| { | |
| "epoch": 0.5531248311996975, | |
| "grad_norm": 0.05007950961589813, | |
| "learning_rate": 0.00016839402139863461, | |
| "loss": 0.0757, | |
| "num_input_tokens_seen": 11109904, | |
| "step": 1280, | |
| "train_runtime": 21342.7154, | |
| "train_tokens_per_second": 520.548 | |
| }, | |
| { | |
| "epoch": 0.5552854750715713, | |
| "grad_norm": 0.06686703860759735, | |
| "learning_rate": 0.00016814064037152805, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 11153008, | |
| "step": 1285, | |
| "train_runtime": 21363.8744, | |
| "train_tokens_per_second": 522.05 | |
| }, | |
| { | |
| "epoch": 0.5574461189434452, | |
| "grad_norm": 0.05480387806892395, | |
| "learning_rate": 0.00016788644003278038, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 11196352, | |
| "step": 1290, | |
| "train_runtime": 21385.9113, | |
| "train_tokens_per_second": 523.539 | |
| }, | |
| { | |
| "epoch": 0.559606762815319, | |
| "grad_norm": 0.07160132378339767, | |
| "learning_rate": 0.00016763142343885384, | |
| "loss": 0.0703, | |
| "num_input_tokens_seen": 11239520, | |
| "step": 1295, | |
| "train_runtime": 21407.2306, | |
| "train_tokens_per_second": 525.034 | |
| }, | |
| { | |
| "epoch": 0.5617674066871928, | |
| "grad_norm": 0.06048699840903282, | |
| "learning_rate": 0.0001673755936560253, | |
| "loss": 0.0775, | |
| "num_input_tokens_seen": 11283248, | |
| "step": 1300, | |
| "train_runtime": 21428.6481, | |
| "train_tokens_per_second": 526.55 | |
| }, | |
| { | |
| "epoch": 0.5639280505590666, | |
| "grad_norm": 0.05485227331519127, | |
| "learning_rate": 0.0001671189537603491, | |
| "loss": 0.0719, | |
| "num_input_tokens_seen": 11326608, | |
| "step": 1305, | |
| "train_runtime": 21449.9158, | |
| "train_tokens_per_second": 528.049 | |
| }, | |
| { | |
| "epoch": 0.5660886944309405, | |
| "grad_norm": 0.054880425333976746, | |
| "learning_rate": 0.00016686150683762038, | |
| "loss": 0.0706, | |
| "num_input_tokens_seen": 11369648, | |
| "step": 1310, | |
| "train_runtime": 21471.9256, | |
| "train_tokens_per_second": 529.512 | |
| }, | |
| { | |
| "epoch": 0.5682493383028142, | |
| "grad_norm": 0.07284388691186905, | |
| "learning_rate": 0.00016660325598333783, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 11412624, | |
| "step": 1315, | |
| "train_runtime": 21493.5433, | |
| "train_tokens_per_second": 530.979 | |
| }, | |
| { | |
| "epoch": 0.5704099821746881, | |
| "grad_norm": 0.07158586382865906, | |
| "learning_rate": 0.00016634420430266644, | |
| "loss": 0.0783, | |
| "num_input_tokens_seen": 11456256, | |
| "step": 1320, | |
| "train_runtime": 21515.7322, | |
| "train_tokens_per_second": 532.459 | |
| }, | |
| { | |
| "epoch": 0.5725706260465618, | |
| "grad_norm": 0.06218944862484932, | |
| "learning_rate": 0.00016608435491040016, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 11499632, | |
| "step": 1325, | |
| "train_runtime": 21537.0635, | |
| "train_tokens_per_second": 533.946 | |
| }, | |
| { | |
| "epoch": 0.5747312699184357, | |
| "grad_norm": 0.06975477933883667, | |
| "learning_rate": 0.00016582371093092456, | |
| "loss": 0.0701, | |
| "num_input_tokens_seen": 11543056, | |
| "step": 1330, | |
| "train_runtime": 21558.7893, | |
| "train_tokens_per_second": 535.422 | |
| }, | |
| { | |
| "epoch": 0.5768919137903095, | |
| "grad_norm": 0.07880192995071411, | |
| "learning_rate": 0.00016556227549817919, | |
| "loss": 0.0778, | |
| "num_input_tokens_seen": 11586800, | |
| "step": 1335, | |
| "train_runtime": 21580.8092, | |
| "train_tokens_per_second": 536.903 | |
| }, | |
| { | |
| "epoch": 0.5790525576621833, | |
| "grad_norm": 0.06973356753587723, | |
| "learning_rate": 0.00016530005175561987, | |
| "loss": 0.0646, | |
| "num_input_tokens_seen": 11629808, | |
| "step": 1340, | |
| "train_runtime": 21602.932, | |
| "train_tokens_per_second": 538.344 | |
| }, | |
| { | |
| "epoch": 0.5812132015340572, | |
| "grad_norm": 0.05525905266404152, | |
| "learning_rate": 0.00016503704285618094, | |
| "loss": 0.0684, | |
| "num_input_tokens_seen": 11673088, | |
| "step": 1345, | |
| "train_runtime": 21624.3412, | |
| "train_tokens_per_second": 539.812 | |
| }, | |
| { | |
| "epoch": 0.583373845405931, | |
| "grad_norm": 0.07406817376613617, | |
| "learning_rate": 0.00016477325196223732, | |
| "loss": 0.0748, | |
| "num_input_tokens_seen": 11716752, | |
| "step": 1350, | |
| "train_runtime": 21646.39, | |
| "train_tokens_per_second": 541.28 | |
| }, | |
| { | |
| "epoch": 0.5855344892778048, | |
| "grad_norm": 0.06670234352350235, | |
| "learning_rate": 0.00016450868224556655, | |
| "loss": 0.0778, | |
| "num_input_tokens_seen": 11760400, | |
| "step": 1355, | |
| "train_runtime": 21668.814, | |
| "train_tokens_per_second": 542.734 | |
| }, | |
| { | |
| "epoch": 0.5876951331496786, | |
| "grad_norm": 0.06407748907804489, | |
| "learning_rate": 0.0001642433368873105, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 11803904, | |
| "step": 1360, | |
| "train_runtime": 21690.4295, | |
| "train_tokens_per_second": 544.199 | |
| }, | |
| { | |
| "epoch": 0.5898557770215525, | |
| "grad_norm": 0.0650821402668953, | |
| "learning_rate": 0.0001639772190779374, | |
| "loss": 0.0737, | |
| "num_input_tokens_seen": 11846960, | |
| "step": 1365, | |
| "train_runtime": 21713.0068, | |
| "train_tokens_per_second": 545.616 | |
| }, | |
| { | |
| "epoch": 0.5920164208934262, | |
| "grad_norm": 0.0700002983212471, | |
| "learning_rate": 0.00016371033201720308, | |
| "loss": 0.0763, | |
| "num_input_tokens_seen": 11891024, | |
| "step": 1370, | |
| "train_runtime": 21734.9473, | |
| "train_tokens_per_second": 547.092 | |
| }, | |
| { | |
| "epoch": 0.5941770647653001, | |
| "grad_norm": 0.06870347261428833, | |
| "learning_rate": 0.0001634426789141129, | |
| "loss": 0.0769, | |
| "num_input_tokens_seen": 11934832, | |
| "step": 1375, | |
| "train_runtime": 21757.2922, | |
| "train_tokens_per_second": 548.544 | |
| }, | |
| { | |
| "epoch": 0.5963377086371738, | |
| "grad_norm": 0.052877090871334076, | |
| "learning_rate": 0.0001631742629868829, | |
| "loss": 0.0692, | |
| "num_input_tokens_seen": 11978000, | |
| "step": 1380, | |
| "train_runtime": 21778.7649, | |
| "train_tokens_per_second": 549.985 | |
| }, | |
| { | |
| "epoch": 0.5984983525090477, | |
| "grad_norm": 0.06286793202161789, | |
| "learning_rate": 0.00016290508746290123, | |
| "loss": 0.072, | |
| "num_input_tokens_seen": 12021552, | |
| "step": 1385, | |
| "train_runtime": 21800.3728, | |
| "train_tokens_per_second": 551.438 | |
| }, | |
| { | |
| "epoch": 0.6006589963809215, | |
| "grad_norm": 0.04737339913845062, | |
| "learning_rate": 0.00016263515557868923, | |
| "loss": 0.0704, | |
| "num_input_tokens_seen": 12064832, | |
| "step": 1390, | |
| "train_runtime": 21823.0417, | |
| "train_tokens_per_second": 552.848 | |
| }, | |
| { | |
| "epoch": 0.6028196402527953, | |
| "grad_norm": 0.06066849082708359, | |
| "learning_rate": 0.0001623644705798627, | |
| "loss": 0.0707, | |
| "num_input_tokens_seen": 12107952, | |
| "step": 1395, | |
| "train_runtime": 21844.9979, | |
| "train_tokens_per_second": 554.267 | |
| }, | |
| { | |
| "epoch": 0.6049802841246692, | |
| "grad_norm": 0.08087003231048584, | |
| "learning_rate": 0.0001620930357210927, | |
| "loss": 0.0763, | |
| "num_input_tokens_seen": 12151680, | |
| "step": 1400, | |
| "train_runtime": 21867.3505, | |
| "train_tokens_per_second": 555.7 | |
| }, | |
| { | |
| "epoch": 0.6049802841246692, | |
| "eval_loss": 0.07353422790765762, | |
| "eval_runtime": 26942.5423, | |
| "eval_samples_per_second": 0.344, | |
| "eval_steps_per_second": 0.172, | |
| "num_input_tokens_seen": 12151680, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.607140927996543, | |
| "grad_norm": 0.06639593839645386, | |
| "learning_rate": 0.00016182085426606646, | |
| "loss": 0.0825, | |
| "num_input_tokens_seen": 12195536, | |
| "step": 1405, | |
| "train_runtime": 48834.2475, | |
| "train_tokens_per_second": 249.733 | |
| }, | |
| { | |
| "epoch": 0.6093015718684168, | |
| "grad_norm": 0.06361619383096695, | |
| "learning_rate": 0.0001615479294874482, | |
| "loss": 0.0751, | |
| "num_input_tokens_seen": 12239248, | |
| "step": 1410, | |
| "train_runtime": 48856.1569, | |
| "train_tokens_per_second": 250.516 | |
| }, | |
| { | |
| "epoch": 0.6114622157402906, | |
| "grad_norm": 0.05519590154290199, | |
| "learning_rate": 0.0001612742646668397, | |
| "loss": 0.0654, | |
| "num_input_tokens_seen": 12282320, | |
| "step": 1415, | |
| "train_runtime": 48877.9032, | |
| "train_tokens_per_second": 251.286 | |
| }, | |
| { | |
| "epoch": 0.6136228596121645, | |
| "grad_norm": 0.0630306825041771, | |
| "learning_rate": 0.0001609998630947409, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 12325696, | |
| "step": 1420, | |
| "train_runtime": 48899.6287, | |
| "train_tokens_per_second": 252.061 | |
| }, | |
| { | |
| "epoch": 0.6157835034840382, | |
| "grad_norm": 0.07874094694852829, | |
| "learning_rate": 0.0001607247280705104, | |
| "loss": 0.0754, | |
| "num_input_tokens_seen": 12368960, | |
| "step": 1425, | |
| "train_runtime": 48920.7427, | |
| "train_tokens_per_second": 252.837 | |
| }, | |
| { | |
| "epoch": 0.6179441473559121, | |
| "grad_norm": 0.07459452748298645, | |
| "learning_rate": 0.00016044886290232551, | |
| "loss": 0.078, | |
| "num_input_tokens_seen": 12412576, | |
| "step": 1430, | |
| "train_runtime": 48942.9145, | |
| "train_tokens_per_second": 253.613 | |
| }, | |
| { | |
| "epoch": 0.6201047912277858, | |
| "grad_norm": 0.05790963023900986, | |
| "learning_rate": 0.0001601722709071429, | |
| "loss": 0.0715, | |
| "num_input_tokens_seen": 12456080, | |
| "step": 1435, | |
| "train_runtime": 48964.6715, | |
| "train_tokens_per_second": 254.389 | |
| }, | |
| { | |
| "epoch": 0.6222654350996597, | |
| "grad_norm": 0.05693833902478218, | |
| "learning_rate": 0.00015989495541065825, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 12499104, | |
| "step": 1440, | |
| "train_runtime": 48986.6876, | |
| "train_tokens_per_second": 255.153 | |
| }, | |
| { | |
| "epoch": 0.6244260789715336, | |
| "grad_norm": 0.06325607001781464, | |
| "learning_rate": 0.0001596169197472667, | |
| "loss": 0.0756, | |
| "num_input_tokens_seen": 12542528, | |
| "step": 1445, | |
| "train_runtime": 49007.8533, | |
| "train_tokens_per_second": 255.929 | |
| }, | |
| { | |
| "epoch": 0.6265867228434073, | |
| "grad_norm": 0.06776595860719681, | |
| "learning_rate": 0.00015933816726002245, | |
| "loss": 0.0733, | |
| "num_input_tokens_seen": 12585680, | |
| "step": 1450, | |
| "train_runtime": 49030.8771, | |
| "train_tokens_per_second": 256.689 | |
| }, | |
| { | |
| "epoch": 0.6287473667152812, | |
| "grad_norm": 0.0681847482919693, | |
| "learning_rate": 0.0001590587013005987, | |
| "loss": 0.0825, | |
| "num_input_tokens_seen": 12629408, | |
| "step": 1455, | |
| "train_runtime": 49052.8605, | |
| "train_tokens_per_second": 257.465 | |
| }, | |
| { | |
| "epoch": 0.630908010587155, | |
| "grad_norm": 0.07597000896930695, | |
| "learning_rate": 0.00015877852522924732, | |
| "loss": 0.0824, | |
| "num_input_tokens_seen": 12673216, | |
| "step": 1460, | |
| "train_runtime": 49074.2647, | |
| "train_tokens_per_second": 258.246 | |
| }, | |
| { | |
| "epoch": 0.6330686544590288, | |
| "grad_norm": 0.0672716274857521, | |
| "learning_rate": 0.00015849764241475844, | |
| "loss": 0.0668, | |
| "num_input_tokens_seen": 12716384, | |
| "step": 1465, | |
| "train_runtime": 49096.5146, | |
| "train_tokens_per_second": 259.008 | |
| }, | |
| { | |
| "epoch": 0.6352292983309026, | |
| "grad_norm": 0.08129267394542694, | |
| "learning_rate": 0.00015821605623441993, | |
| "loss": 0.0781, | |
| "num_input_tokens_seen": 12760208, | |
| "step": 1470, | |
| "train_runtime": 49118.6675, | |
| "train_tokens_per_second": 259.783 | |
| }, | |
| { | |
| "epoch": 0.6373899422027764, | |
| "grad_norm": 0.1420648843050003, | |
| "learning_rate": 0.00015793377007397683, | |
| "loss": 0.0762, | |
| "num_input_tokens_seen": 12803552, | |
| "step": 1475, | |
| "train_runtime": 49139.971, | |
| "train_tokens_per_second": 260.553 | |
| }, | |
| { | |
| "epoch": 0.6395505860746502, | |
| "grad_norm": 0.08139633387327194, | |
| "learning_rate": 0.00015765078732759067, | |
| "loss": 0.0861, | |
| "num_input_tokens_seen": 12847792, | |
| "step": 1480, | |
| "train_runtime": 49162.4487, | |
| "train_tokens_per_second": 261.333 | |
| }, | |
| { | |
| "epoch": 0.6417112299465241, | |
| "grad_norm": 0.05912632867693901, | |
| "learning_rate": 0.00015736711139779856, | |
| "loss": 0.0706, | |
| "num_input_tokens_seen": 12891120, | |
| "step": 1485, | |
| "train_runtime": 49184.1068, | |
| "train_tokens_per_second": 262.099 | |
| }, | |
| { | |
| "epoch": 0.6438718738183978, | |
| "grad_norm": 0.0663304552435875, | |
| "learning_rate": 0.00015708274569547231, | |
| "loss": 0.077, | |
| "num_input_tokens_seen": 12934784, | |
| "step": 1490, | |
| "train_runtime": 49205.9826, | |
| "train_tokens_per_second": 262.87 | |
| }, | |
| { | |
| "epoch": 0.6460325176902717, | |
| "grad_norm": 0.04382750019431114, | |
| "learning_rate": 0.00015679769363977753, | |
| "loss": 0.0644, | |
| "num_input_tokens_seen": 12977696, | |
| "step": 1495, | |
| "train_runtime": 49226.9806, | |
| "train_tokens_per_second": 263.63 | |
| }, | |
| { | |
| "epoch": 0.6481931615621456, | |
| "grad_norm": 0.06222411245107651, | |
| "learning_rate": 0.00015651195865813234, | |
| "loss": 0.0721, | |
| "num_input_tokens_seen": 13020880, | |
| "step": 1500, | |
| "train_runtime": 49248.1972, | |
| "train_tokens_per_second": 264.393 | |
| }, | |
| { | |
| "epoch": 0.6503538054340193, | |
| "grad_norm": 0.05253620073199272, | |
| "learning_rate": 0.00015622554418616625, | |
| "loss": 0.0747, | |
| "num_input_tokens_seen": 13064176, | |
| "step": 1505, | |
| "train_runtime": 49269.4676, | |
| "train_tokens_per_second": 265.158 | |
| }, | |
| { | |
| "epoch": 0.6525144493058932, | |
| "grad_norm": 0.07142533361911774, | |
| "learning_rate": 0.0001559384536676789, | |
| "loss": 0.074, | |
| "num_input_tokens_seen": 13107616, | |
| "step": 1510, | |
| "train_runtime": 49291.1457, | |
| "train_tokens_per_second": 265.922 | |
| }, | |
| { | |
| "epoch": 0.654675093177767, | |
| "grad_norm": 0.06417880207300186, | |
| "learning_rate": 0.00015565069055459855, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 13151296, | |
| "step": 1515, | |
| "train_runtime": 49313.0519, | |
| "train_tokens_per_second": 266.69 | |
| }, | |
| { | |
| "epoch": 0.6568357370496408, | |
| "grad_norm": 0.07250814884901047, | |
| "learning_rate": 0.00015536225830694068, | |
| "loss": 0.0677, | |
| "num_input_tokens_seen": 13194240, | |
| "step": 1520, | |
| "train_runtime": 49334.5357, | |
| "train_tokens_per_second": 267.444 | |
| }, | |
| { | |
| "epoch": 0.6589963809215146, | |
| "grad_norm": 0.07462477684020996, | |
| "learning_rate": 0.0001550731603927663, | |
| "loss": 0.0689, | |
| "num_input_tokens_seen": 13237296, | |
| "step": 1525, | |
| "train_runtime": 49356.2727, | |
| "train_tokens_per_second": 268.199 | |
| }, | |
| { | |
| "epoch": 0.6611570247933884, | |
| "grad_norm": 0.07405927777290344, | |
| "learning_rate": 0.00015478340028814028, | |
| "loss": 0.0765, | |
| "num_input_tokens_seen": 13280976, | |
| "step": 1530, | |
| "train_runtime": 49377.9371, | |
| "train_tokens_per_second": 268.966 | |
| }, | |
| { | |
| "epoch": 0.6633176686652622, | |
| "grad_norm": 0.0729064792394638, | |
| "learning_rate": 0.00015449298147708954, | |
| "loss": 0.0754, | |
| "num_input_tokens_seen": 13324480, | |
| "step": 1535, | |
| "train_runtime": 49400.043, | |
| "train_tokens_per_second": 269.726 | |
| }, | |
| { | |
| "epoch": 0.6654783125371361, | |
| "grad_norm": 0.07033967971801758, | |
| "learning_rate": 0.00015420190745156126, | |
| "loss": 0.0744, | |
| "num_input_tokens_seen": 13367904, | |
| "step": 1540, | |
| "train_runtime": 49421.5579, | |
| "train_tokens_per_second": 270.487 | |
| }, | |
| { | |
| "epoch": 0.6676389564090098, | |
| "grad_norm": 0.0658600926399231, | |
| "learning_rate": 0.0001539101817113807, | |
| "loss": 0.0763, | |
| "num_input_tokens_seen": 13411344, | |
| "step": 1545, | |
| "train_runtime": 49443.895, | |
| "train_tokens_per_second": 271.244 | |
| }, | |
| { | |
| "epoch": 0.6697996002808837, | |
| "grad_norm": 0.05582299083471298, | |
| "learning_rate": 0.00015361780776420924, | |
| "loss": 0.0774, | |
| "num_input_tokens_seen": 13455088, | |
| "step": 1550, | |
| "train_runtime": 49466.0961, | |
| "train_tokens_per_second": 272.006 | |
| }, | |
| { | |
| "epoch": 0.6719602441527576, | |
| "grad_norm": 0.07074993848800659, | |
| "learning_rate": 0.00015332478912550229, | |
| "loss": 0.0773, | |
| "num_input_tokens_seen": 13498336, | |
| "step": 1555, | |
| "train_runtime": 49487.8812, | |
| "train_tokens_per_second": 272.76 | |
| }, | |
| { | |
| "epoch": 0.6741208880246313, | |
| "grad_norm": 0.052721716463565826, | |
| "learning_rate": 0.0001530311293184668, | |
| "loss": 0.0701, | |
| "num_input_tokens_seen": 13541392, | |
| "step": 1560, | |
| "train_runtime": 49508.932, | |
| "train_tokens_per_second": 273.514 | |
| }, | |
| { | |
| "epoch": 0.6762815318965052, | |
| "grad_norm": 0.07522527128458023, | |
| "learning_rate": 0.00015273683187401913, | |
| "loss": 0.0825, | |
| "num_input_tokens_seen": 13585072, | |
| "step": 1565, | |
| "train_runtime": 49530.1248, | |
| "train_tokens_per_second": 274.279 | |
| }, | |
| { | |
| "epoch": 0.678442175768379, | |
| "grad_norm": 0.05335766449570656, | |
| "learning_rate": 0.00015244190033074243, | |
| "loss": 0.0747, | |
| "num_input_tokens_seen": 13628304, | |
| "step": 1570, | |
| "train_runtime": 49551.7374, | |
| "train_tokens_per_second": 275.032 | |
| }, | |
| { | |
| "epoch": 0.6806028196402528, | |
| "grad_norm": 0.055294234305620193, | |
| "learning_rate": 0.0001521463382348441, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 13671536, | |
| "step": 1575, | |
| "train_runtime": 49572.8424, | |
| "train_tokens_per_second": 275.787 | |
| }, | |
| { | |
| "epoch": 0.6827634635121266, | |
| "grad_norm": 0.0787506178021431, | |
| "learning_rate": 0.0001518501491401133, | |
| "loss": 0.0742, | |
| "num_input_tokens_seen": 13715280, | |
| "step": 1580, | |
| "train_runtime": 49595.5944, | |
| "train_tokens_per_second": 276.542 | |
| }, | |
| { | |
| "epoch": 0.6849241073840004, | |
| "grad_norm": 0.08133631199598312, | |
| "learning_rate": 0.00015155333660787806, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 13759312, | |
| "step": 1585, | |
| "train_runtime": 49617.1912, | |
| "train_tokens_per_second": 277.309 | |
| }, | |
| { | |
| "epoch": 0.6870847512558742, | |
| "grad_norm": 0.06821410357952118, | |
| "learning_rate": 0.00015125590420696257, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 13802112, | |
| "step": 1590, | |
| "train_runtime": 49638.3396, | |
| "train_tokens_per_second": 278.053 | |
| }, | |
| { | |
| "epoch": 0.6892453951277481, | |
| "grad_norm": 0.059675756841897964, | |
| "learning_rate": 0.00015095785551364412, | |
| "loss": 0.0696, | |
| "num_input_tokens_seen": 13845200, | |
| "step": 1595, | |
| "train_runtime": 49660.2073, | |
| "train_tokens_per_second": 278.799 | |
| }, | |
| { | |
| "epoch": 0.6914060389996219, | |
| "grad_norm": 0.05362169072031975, | |
| "learning_rate": 0.00015065919411161026, | |
| "loss": 0.0732, | |
| "num_input_tokens_seen": 13888800, | |
| "step": 1600, | |
| "train_runtime": 49681.8846, | |
| "train_tokens_per_second": 279.555 | |
| }, | |
| { | |
| "epoch": 0.6914060389996219, | |
| "eval_loss": 0.07299761474132538, | |
| "eval_runtime": 2467.271, | |
| "eval_samples_per_second": 3.752, | |
| "eval_steps_per_second": 1.876, | |
| "num_input_tokens_seen": 13888800, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6935666828714957, | |
| "grad_norm": 0.05830957740545273, | |
| "learning_rate": 0.00015035992359191568, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 13931968, | |
| "step": 1605, | |
| "train_runtime": 52175.1561, | |
| "train_tokens_per_second": 267.023 | |
| }, | |
| { | |
| "epoch": 0.6957273267433696, | |
| "grad_norm": 0.06323828548192978, | |
| "learning_rate": 0.00015006004755293886, | |
| "loss": 0.0732, | |
| "num_input_tokens_seen": 13975296, | |
| "step": 1610, | |
| "train_runtime": 52196.8339, | |
| "train_tokens_per_second": 267.742 | |
| }, | |
| { | |
| "epoch": 0.6978879706152433, | |
| "grad_norm": 0.0689457580447197, | |
| "learning_rate": 0.00014975956960033913, | |
| "loss": 0.0769, | |
| "num_input_tokens_seen": 14018608, | |
| "step": 1615, | |
| "train_runtime": 52218.6012, | |
| "train_tokens_per_second": 268.46 | |
| }, | |
| { | |
| "epoch": 0.7000486144871172, | |
| "grad_norm": 0.07999309152364731, | |
| "learning_rate": 0.00014945849334701308, | |
| "loss": 0.0759, | |
| "num_input_tokens_seen": 14062144, | |
| "step": 1620, | |
| "train_runtime": 52240.9771, | |
| "train_tokens_per_second": 269.178 | |
| }, | |
| { | |
| "epoch": 0.702209258358991, | |
| "grad_norm": 0.06296563893556595, | |
| "learning_rate": 0.000149156822413051, | |
| "loss": 0.0796, | |
| "num_input_tokens_seen": 14105600, | |
| "step": 1625, | |
| "train_runtime": 52262.9578, | |
| "train_tokens_per_second": 269.897 | |
| }, | |
| { | |
| "epoch": 0.7043699022308648, | |
| "grad_norm": 0.06787339597940445, | |
| "learning_rate": 0.00014885456042569372, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 14149056, | |
| "step": 1630, | |
| "train_runtime": 52284.4218, | |
| "train_tokens_per_second": 270.617 | |
| }, | |
| { | |
| "epoch": 0.7065305461027386, | |
| "grad_norm": 0.06111348420381546, | |
| "learning_rate": 0.00014855171101928872, | |
| "loss": 0.077, | |
| "num_input_tokens_seen": 14192800, | |
| "step": 1635, | |
| "train_runtime": 52306.1416, | |
| "train_tokens_per_second": 271.341 | |
| }, | |
| { | |
| "epoch": 0.7086911899746124, | |
| "grad_norm": 0.06680696457624435, | |
| "learning_rate": 0.0001482482778352465, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 14236272, | |
| "step": 1640, | |
| "train_runtime": 52327.9176, | |
| "train_tokens_per_second": 272.059 | |
| }, | |
| { | |
| "epoch": 0.7108518338464862, | |
| "grad_norm": 0.058288805186748505, | |
| "learning_rate": 0.00014794426452199687, | |
| "loss": 0.0693, | |
| "num_input_tokens_seen": 14279504, | |
| "step": 1645, | |
| "train_runtime": 52349.7701, | |
| "train_tokens_per_second": 272.771 | |
| }, | |
| { | |
| "epoch": 0.7130124777183601, | |
| "grad_norm": 0.06921833008527756, | |
| "learning_rate": 0.0001476396747349449, | |
| "loss": 0.0768, | |
| "num_input_tokens_seen": 14323296, | |
| "step": 1650, | |
| "train_runtime": 52371.3024, | |
| "train_tokens_per_second": 273.495 | |
| }, | |
| { | |
| "epoch": 0.7151731215902339, | |
| "grad_norm": 0.07161122560501099, | |
| "learning_rate": 0.00014733451213642712, | |
| "loss": 0.0785, | |
| "num_input_tokens_seen": 14367168, | |
| "step": 1655, | |
| "train_runtime": 52392.8415, | |
| "train_tokens_per_second": 274.22 | |
| }, | |
| { | |
| "epoch": 0.7173337654621077, | |
| "grad_norm": 0.08010240644216537, | |
| "learning_rate": 0.00014702878039566758, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 14410560, | |
| "step": 1660, | |
| "train_runtime": 52414.3262, | |
| "train_tokens_per_second": 274.936 | |
| }, | |
| { | |
| "epoch": 0.7194944093339816, | |
| "grad_norm": 0.0588817335665226, | |
| "learning_rate": 0.00014672248318873342, | |
| "loss": 0.0695, | |
| "num_input_tokens_seen": 14453552, | |
| "step": 1665, | |
| "train_runtime": 52436.6055, | |
| "train_tokens_per_second": 275.639 | |
| }, | |
| { | |
| "epoch": 0.7216550532058553, | |
| "grad_norm": 0.06551285833120346, | |
| "learning_rate": 0.00014641562419849094, | |
| "loss": 0.0725, | |
| "num_input_tokens_seen": 14497120, | |
| "step": 1670, | |
| "train_runtime": 52458.3769, | |
| "train_tokens_per_second": 276.355 | |
| }, | |
| { | |
| "epoch": 0.7238156970777292, | |
| "grad_norm": 0.07580401748418808, | |
| "learning_rate": 0.00014610820711456122, | |
| "loss": 0.0848, | |
| "num_input_tokens_seen": 14540912, | |
| "step": 1675, | |
| "train_runtime": 52480.8053, | |
| "train_tokens_per_second": 277.071 | |
| }, | |
| { | |
| "epoch": 0.725976340949603, | |
| "grad_norm": 0.04763949662446976, | |
| "learning_rate": 0.0001458002356332758, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 14583920, | |
| "step": 1680, | |
| "train_runtime": 52502.201, | |
| "train_tokens_per_second": 277.777 | |
| }, | |
| { | |
| "epoch": 0.7281369848214768, | |
| "grad_norm": 0.0561816431581974, | |
| "learning_rate": 0.0001454917134576321, | |
| "loss": 0.0683, | |
| "num_input_tokens_seen": 14627040, | |
| "step": 1685, | |
| "train_runtime": 52524.1203, | |
| "train_tokens_per_second": 278.482 | |
| }, | |
| { | |
| "epoch": 0.7302976286933506, | |
| "grad_norm": 0.0591006763279438, | |
| "learning_rate": 0.0001451826442972491, | |
| "loss": 0.0721, | |
| "num_input_tokens_seen": 14670560, | |
| "step": 1690, | |
| "train_runtime": 52546.3285, | |
| "train_tokens_per_second": 279.193 | |
| }, | |
| { | |
| "epoch": 0.7324582725652244, | |
| "grad_norm": 0.05632052198052406, | |
| "learning_rate": 0.00014487303186832255, | |
| "loss": 0.073, | |
| "num_input_tokens_seen": 14714208, | |
| "step": 1695, | |
| "train_runtime": 52567.7942, | |
| "train_tokens_per_second": 279.909 | |
| }, | |
| { | |
| "epoch": 0.7346189164370982, | |
| "grad_norm": 0.0633561760187149, | |
| "learning_rate": 0.00014456287989358048, | |
| "loss": 0.0773, | |
| "num_input_tokens_seen": 14757776, | |
| "step": 1700, | |
| "train_runtime": 52589.4678, | |
| "train_tokens_per_second": 280.622 | |
| }, | |
| { | |
| "epoch": 0.7367795603089721, | |
| "grad_norm": 0.058160725980997086, | |
| "learning_rate": 0.0001442521921022382, | |
| "loss": 0.0732, | |
| "num_input_tokens_seen": 14801440, | |
| "step": 1705, | |
| "train_runtime": 52611.4205, | |
| "train_tokens_per_second": 281.335 | |
| }, | |
| { | |
| "epoch": 0.7389402041808459, | |
| "grad_norm": 0.0793909877538681, | |
| "learning_rate": 0.0001439409722299537, | |
| "loss": 0.0794, | |
| "num_input_tokens_seen": 14845088, | |
| "step": 1710, | |
| "train_runtime": 52633.3935, | |
| "train_tokens_per_second": 282.047 | |
| }, | |
| { | |
| "epoch": 0.7411008480527197, | |
| "grad_norm": 0.06253749877214432, | |
| "learning_rate": 0.00014362922401878254, | |
| "loss": 0.072, | |
| "num_input_tokens_seen": 14888400, | |
| "step": 1715, | |
| "train_runtime": 52655.0177, | |
| "train_tokens_per_second": 282.754 | |
| }, | |
| { | |
| "epoch": 0.7432614919245936, | |
| "grad_norm": 0.061189230531454086, | |
| "learning_rate": 0.00014331695121713297, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 14931328, | |
| "step": 1720, | |
| "train_runtime": 52676.0686, | |
| "train_tokens_per_second": 283.456 | |
| }, | |
| { | |
| "epoch": 0.7454221357964673, | |
| "grad_norm": 0.07376944273710251, | |
| "learning_rate": 0.0001430041575797208, | |
| "loss": 0.0718, | |
| "num_input_tokens_seen": 14974592, | |
| "step": 1725, | |
| "train_runtime": 52697.2214, | |
| "train_tokens_per_second": 284.163 | |
| }, | |
| { | |
| "epoch": 0.7475827796683412, | |
| "grad_norm": 0.07209795713424683, | |
| "learning_rate": 0.00014269084686752435, | |
| "loss": 0.0724, | |
| "num_input_tokens_seen": 15017824, | |
| "step": 1730, | |
| "train_runtime": 52719.2751, | |
| "train_tokens_per_second": 284.864 | |
| }, | |
| { | |
| "epoch": 0.749743423540215, | |
| "grad_norm": 0.05107741057872772, | |
| "learning_rate": 0.00014237702284773914, | |
| "loss": 0.0699, | |
| "num_input_tokens_seen": 15060864, | |
| "step": 1735, | |
| "train_runtime": 52741.7167, | |
| "train_tokens_per_second": 285.559 | |
| }, | |
| { | |
| "epoch": 0.7519040674120888, | |
| "grad_norm": 0.081186942756176, | |
| "learning_rate": 0.00014206268929373256, | |
| "loss": 0.0757, | |
| "num_input_tokens_seen": 15104000, | |
| "step": 1740, | |
| "train_runtime": 52763.6249, | |
| "train_tokens_per_second": 286.258 | |
| }, | |
| { | |
| "epoch": 0.7540647112839626, | |
| "grad_norm": 0.07930338382720947, | |
| "learning_rate": 0.0001417478499849986, | |
| "loss": 0.0782, | |
| "num_input_tokens_seen": 15147648, | |
| "step": 1745, | |
| "train_runtime": 52785.4191, | |
| "train_tokens_per_second": 286.967 | |
| }, | |
| { | |
| "epoch": 0.7562253551558364, | |
| "grad_norm": 0.07188103348016739, | |
| "learning_rate": 0.00014143250870711233, | |
| "loss": 0.0754, | |
| "num_input_tokens_seen": 15190896, | |
| "step": 1750, | |
| "train_runtime": 52807.0112, | |
| "train_tokens_per_second": 287.668 | |
| }, | |
| { | |
| "epoch": 0.7583859990277103, | |
| "grad_norm": 0.052755411714315414, | |
| "learning_rate": 0.00014111666925168442, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 15233888, | |
| "step": 1755, | |
| "train_runtime": 52828.0814, | |
| "train_tokens_per_second": 288.367 | |
| }, | |
| { | |
| "epoch": 0.7605466428995841, | |
| "grad_norm": 0.05128923058509827, | |
| "learning_rate": 0.0001408003354163156, | |
| "loss": 0.0718, | |
| "num_input_tokens_seen": 15276944, | |
| "step": 1760, | |
| "train_runtime": 52850.1346, | |
| "train_tokens_per_second": 289.062 | |
| }, | |
| { | |
| "epoch": 0.7627072867714579, | |
| "grad_norm": 0.06151962652802467, | |
| "learning_rate": 0.0001404835110045509, | |
| "loss": 0.066, | |
| "num_input_tokens_seen": 15319904, | |
| "step": 1765, | |
| "train_runtime": 52872.1065, | |
| "train_tokens_per_second": 289.754 | |
| }, | |
| { | |
| "epoch": 0.7648679306433317, | |
| "grad_norm": 0.0742822214961052, | |
| "learning_rate": 0.0001401661998258339, | |
| "loss": 0.0756, | |
| "num_input_tokens_seen": 15363488, | |
| "step": 1770, | |
| "train_runtime": 52893.7032, | |
| "train_tokens_per_second": 290.46 | |
| }, | |
| { | |
| "epoch": 0.7670285745152056, | |
| "grad_norm": 0.0559610053896904, | |
| "learning_rate": 0.0001398484056954611, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 15406704, | |
| "step": 1775, | |
| "train_runtime": 52916.0361, | |
| "train_tokens_per_second": 291.154 | |
| }, | |
| { | |
| "epoch": 0.7691892183870793, | |
| "grad_norm": 0.07098235189914703, | |
| "learning_rate": 0.00013953013243453582, | |
| "loss": 0.0744, | |
| "num_input_tokens_seen": 15450144, | |
| "step": 1780, | |
| "train_runtime": 52937.3355, | |
| "train_tokens_per_second": 291.857 | |
| }, | |
| { | |
| "epoch": 0.7713498622589532, | |
| "grad_norm": 0.06474477797746658, | |
| "learning_rate": 0.00013921138386992243, | |
| "loss": 0.0724, | |
| "num_input_tokens_seen": 15493568, | |
| "step": 1785, | |
| "train_runtime": 52959.2595, | |
| "train_tokens_per_second": 292.556 | |
| }, | |
| { | |
| "epoch": 0.773510506130827, | |
| "grad_norm": 0.06821322441101074, | |
| "learning_rate": 0.0001388921638342003, | |
| "loss": 0.0817, | |
| "num_input_tokens_seen": 15537664, | |
| "step": 1790, | |
| "train_runtime": 52980.746, | |
| "train_tokens_per_second": 293.27 | |
| }, | |
| { | |
| "epoch": 0.7756711500027008, | |
| "grad_norm": 0.07596802711486816, | |
| "learning_rate": 0.00013857247616561757, | |
| "loss": 0.0782, | |
| "num_input_tokens_seen": 15581200, | |
| "step": 1795, | |
| "train_runtime": 53003.4224, | |
| "train_tokens_per_second": 293.966 | |
| }, | |
| { | |
| "epoch": 0.7778317938745746, | |
| "grad_norm": 0.0654403567314148, | |
| "learning_rate": 0.00013825232470804523, | |
| "loss": 0.0732, | |
| "num_input_tokens_seen": 15624848, | |
| "step": 1800, | |
| "train_runtime": 53026.1216, | |
| "train_tokens_per_second": 294.663 | |
| }, | |
| { | |
| "epoch": 0.7778317938745746, | |
| "eval_loss": 0.07238650321960449, | |
| "eval_runtime": 712.4182, | |
| "eval_samples_per_second": 12.994, | |
| "eval_steps_per_second": 6.498, | |
| "num_input_tokens_seen": 15624848, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7799924377464484, | |
| "grad_norm": 0.06928804516792297, | |
| "learning_rate": 0.00013793171331093077, | |
| "loss": 0.0793, | |
| "num_input_tokens_seen": 15668624, | |
| "step": 1805, | |
| "train_runtime": 53766.0817, | |
| "train_tokens_per_second": 291.422 | |
| }, | |
| { | |
| "epoch": 0.7821530816183223, | |
| "grad_norm": 0.06405510008335114, | |
| "learning_rate": 0.0001376106458292519, | |
| "loss": 0.0709, | |
| "num_input_tokens_seen": 15711872, | |
| "step": 1810, | |
| "train_runtime": 53788.3061, | |
| "train_tokens_per_second": 292.106 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 0.05507315695285797, | |
| "learning_rate": 0.00013728912612347017, | |
| "loss": 0.0745, | |
| "num_input_tokens_seen": 15755216, | |
| "step": 1815, | |
| "train_runtime": 53810.5879, | |
| "train_tokens_per_second": 292.79 | |
| }, | |
| { | |
| "epoch": 0.7864743693620699, | |
| "grad_norm": 0.05629369989037514, | |
| "learning_rate": 0.00013696715805948474, | |
| "loss": 0.0735, | |
| "num_input_tokens_seen": 15798480, | |
| "step": 1820, | |
| "train_runtime": 53831.7178, | |
| "train_tokens_per_second": 293.479 | |
| }, | |
| { | |
| "epoch": 0.7886350132339437, | |
| "grad_norm": 0.060665566474199295, | |
| "learning_rate": 0.00013664474550858553, | |
| "loss": 0.0651, | |
| "num_input_tokens_seen": 15841664, | |
| "step": 1825, | |
| "train_runtime": 53853.2255, | |
| "train_tokens_per_second": 294.164 | |
| }, | |
| { | |
| "epoch": 0.7907956571058176, | |
| "grad_norm": 0.05001268535852432, | |
| "learning_rate": 0.00013632189234740713, | |
| "loss": 0.0768, | |
| "num_input_tokens_seen": 15885376, | |
| "step": 1830, | |
| "train_runtime": 53874.7714, | |
| "train_tokens_per_second": 294.857 | |
| }, | |
| { | |
| "epoch": 0.7929563009776913, | |
| "grad_norm": 0.06804929673671722, | |
| "learning_rate": 0.00013599860245788178, | |
| "loss": 0.0761, | |
| "num_input_tokens_seen": 15929120, | |
| "step": 1835, | |
| "train_runtime": 53897.3682, | |
| "train_tokens_per_second": 295.545 | |
| }, | |
| { | |
| "epoch": 0.7951169448495652, | |
| "grad_norm": 0.04843413084745407, | |
| "learning_rate": 0.00013567487972719305, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 15971568, | |
| "step": 1840, | |
| "train_runtime": 53919.5564, | |
| "train_tokens_per_second": 296.211 | |
| }, | |
| { | |
| "epoch": 0.797277588721439, | |
| "grad_norm": 0.06037944182753563, | |
| "learning_rate": 0.00013535072804772864, | |
| "loss": 0.0824, | |
| "num_input_tokens_seen": 16015520, | |
| "step": 1845, | |
| "train_runtime": 53941.2394, | |
| "train_tokens_per_second": 296.907 | |
| }, | |
| { | |
| "epoch": 0.7994382325933128, | |
| "grad_norm": 0.06481627374887466, | |
| "learning_rate": 0.00013502615131703413, | |
| "loss": 0.0695, | |
| "num_input_tokens_seen": 16058864, | |
| "step": 1850, | |
| "train_runtime": 53962.9055, | |
| "train_tokens_per_second": 297.591 | |
| }, | |
| { | |
| "epoch": 0.8015988764651866, | |
| "grad_norm": 0.06719739735126495, | |
| "learning_rate": 0.0001347011534377657, | |
| "loss": 0.0645, | |
| "num_input_tokens_seen": 16101680, | |
| "step": 1855, | |
| "train_runtime": 53984.6086, | |
| "train_tokens_per_second": 298.264 | |
| }, | |
| { | |
| "epoch": 0.8037595203370604, | |
| "grad_norm": 0.06208725646138191, | |
| "learning_rate": 0.00013437573831764343, | |
| "loss": 0.0812, | |
| "num_input_tokens_seen": 16145040, | |
| "step": 1860, | |
| "train_runtime": 54006.3057, | |
| "train_tokens_per_second": 298.947 | |
| }, | |
| { | |
| "epoch": 0.8059201642089343, | |
| "grad_norm": 0.06299016624689102, | |
| "learning_rate": 0.00013404990986940412, | |
| "loss": 0.0722, | |
| "num_input_tokens_seen": 16188416, | |
| "step": 1865, | |
| "train_runtime": 54028.3257, | |
| "train_tokens_per_second": 299.628 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 0.07144487649202347, | |
| "learning_rate": 0.00013372367201075453, | |
| "loss": 0.0614, | |
| "num_input_tokens_seen": 16230864, | |
| "step": 1870, | |
| "train_runtime": 54049.739, | |
| "train_tokens_per_second": 300.295 | |
| }, | |
| { | |
| "epoch": 0.8102414519526819, | |
| "grad_norm": 0.06753461062908173, | |
| "learning_rate": 0.00013339702866432392, | |
| "loss": 0.0695, | |
| "num_input_tokens_seen": 16274048, | |
| "step": 1875, | |
| "train_runtime": 54071.0694, | |
| "train_tokens_per_second": 300.975 | |
| }, | |
| { | |
| "epoch": 0.8124020958245557, | |
| "grad_norm": 0.0707787573337555, | |
| "learning_rate": 0.00013306998375761718, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 16317760, | |
| "step": 1880, | |
| "train_runtime": 54092.53, | |
| "train_tokens_per_second": 301.664 | |
| }, | |
| { | |
| "epoch": 0.8145627396964296, | |
| "grad_norm": 0.07154600322246552, | |
| "learning_rate": 0.00013274254122296747, | |
| "loss": 0.0765, | |
| "num_input_tokens_seen": 16361440, | |
| "step": 1885, | |
| "train_runtime": 54113.9916, | |
| "train_tokens_per_second": 302.351 | |
| }, | |
| { | |
| "epoch": 0.8167233835683033, | |
| "grad_norm": 0.07142435014247894, | |
| "learning_rate": 0.00013241470499748893, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 16404896, | |
| "step": 1890, | |
| "train_runtime": 54135.2913, | |
| "train_tokens_per_second": 303.035 | |
| }, | |
| { | |
| "epoch": 0.8188840274401772, | |
| "grad_norm": 0.05690345913171768, | |
| "learning_rate": 0.00013208647902302945, | |
| "loss": 0.0692, | |
| "num_input_tokens_seen": 16448064, | |
| "step": 1895, | |
| "train_runtime": 54156.9443, | |
| "train_tokens_per_second": 303.711 | |
| }, | |
| { | |
| "epoch": 0.8210446713120509, | |
| "grad_norm": 0.06509065628051758, | |
| "learning_rate": 0.00013175786724612307, | |
| "loss": 0.0728, | |
| "num_input_tokens_seen": 16491408, | |
| "step": 1900, | |
| "train_runtime": 54178.256, | |
| "train_tokens_per_second": 304.392 | |
| }, | |
| { | |
| "epoch": 0.8232053151839248, | |
| "grad_norm": 0.0677073523402214, | |
| "learning_rate": 0.00013142887361794277, | |
| "loss": 0.0739, | |
| "num_input_tokens_seen": 16535056, | |
| "step": 1905, | |
| "train_runtime": 54199.8692, | |
| "train_tokens_per_second": 305.076 | |
| }, | |
| { | |
| "epoch": 0.8253659590557987, | |
| "grad_norm": 0.07482102513313293, | |
| "learning_rate": 0.00013109950209425284, | |
| "loss": 0.0731, | |
| "num_input_tokens_seen": 16578592, | |
| "step": 1910, | |
| "train_runtime": 54221.4112, | |
| "train_tokens_per_second": 305.757 | |
| }, | |
| { | |
| "epoch": 0.8275266029276724, | |
| "grad_norm": 0.058642659336328506, | |
| "learning_rate": 0.00013076975663536123, | |
| "loss": 0.072, | |
| "num_input_tokens_seen": 16622016, | |
| "step": 1915, | |
| "train_runtime": 54242.9455, | |
| "train_tokens_per_second": 306.436 | |
| }, | |
| { | |
| "epoch": 0.8296872467995463, | |
| "grad_norm": 0.10473670810461044, | |
| "learning_rate": 0.0001304396412060721, | |
| "loss": 0.0711, | |
| "num_input_tokens_seen": 16665008, | |
| "step": 1920, | |
| "train_runtime": 54264.7218, | |
| "train_tokens_per_second": 307.106 | |
| }, | |
| { | |
| "epoch": 0.8318478906714201, | |
| "grad_norm": 0.060704197734594345, | |
| "learning_rate": 0.00013010915977563803, | |
| "loss": 0.0677, | |
| "num_input_tokens_seen": 16707968, | |
| "step": 1925, | |
| "train_runtime": 54287.3702, | |
| "train_tokens_per_second": 307.769 | |
| }, | |
| { | |
| "epoch": 0.8340085345432939, | |
| "grad_norm": 0.06165318936109543, | |
| "learning_rate": 0.00012977831631771238, | |
| "loss": 0.0709, | |
| "num_input_tokens_seen": 16751296, | |
| "step": 1930, | |
| "train_runtime": 54308.64, | |
| "train_tokens_per_second": 308.446 | |
| }, | |
| { | |
| "epoch": 0.8361691784151677, | |
| "grad_norm": 0.05098670348525047, | |
| "learning_rate": 0.00012944711481030144, | |
| "loss": 0.0662, | |
| "num_input_tokens_seen": 16794128, | |
| "step": 1935, | |
| "train_runtime": 54330.2306, | |
| "train_tokens_per_second": 309.112 | |
| }, | |
| { | |
| "epoch": 0.8383298222870416, | |
| "grad_norm": 0.05525912716984749, | |
| "learning_rate": 0.00012911555923571667, | |
| "loss": 0.0733, | |
| "num_input_tokens_seen": 16837568, | |
| "step": 1940, | |
| "train_runtime": 54352.8277, | |
| "train_tokens_per_second": 309.783 | |
| }, | |
| { | |
| "epoch": 0.8404904661589153, | |
| "grad_norm": 0.06591261923313141, | |
| "learning_rate": 0.0001287836535805267, | |
| "loss": 0.0677, | |
| "num_input_tokens_seen": 16880768, | |
| "step": 1945, | |
| "train_runtime": 54374.0425, | |
| "train_tokens_per_second": 310.456 | |
| }, | |
| { | |
| "epoch": 0.8426511100307892, | |
| "grad_norm": 0.07755870372056961, | |
| "learning_rate": 0.00012845140183550952, | |
| "loss": 0.073, | |
| "num_input_tokens_seen": 16923904, | |
| "step": 1950, | |
| "train_runtime": 54395.7961, | |
| "train_tokens_per_second": 311.125 | |
| }, | |
| { | |
| "epoch": 0.8448117539026629, | |
| "grad_norm": 0.06816552579402924, | |
| "learning_rate": 0.00012811880799560443, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 16967392, | |
| "step": 1955, | |
| "train_runtime": 54418.1536, | |
| "train_tokens_per_second": 311.797 | |
| }, | |
| { | |
| "epoch": 0.8469723977745368, | |
| "grad_norm": 0.07150571793317795, | |
| "learning_rate": 0.00012778587605986403, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 17010560, | |
| "step": 1960, | |
| "train_runtime": 54441.1468, | |
| "train_tokens_per_second": 312.458 | |
| }, | |
| { | |
| "epoch": 0.8491330416464107, | |
| "grad_norm": 0.05127614736557007, | |
| "learning_rate": 0.0001274526100314061, | |
| "loss": 0.0626, | |
| "num_input_tokens_seen": 17053200, | |
| "step": 1965, | |
| "train_runtime": 54462.2921, | |
| "train_tokens_per_second": 313.119 | |
| }, | |
| { | |
| "epoch": 0.8512936855182844, | |
| "grad_norm": 0.06928465515375137, | |
| "learning_rate": 0.00012711901391736555, | |
| "loss": 0.0696, | |
| "num_input_tokens_seen": 17096224, | |
| "step": 1970, | |
| "train_runtime": 54483.9566, | |
| "train_tokens_per_second": 313.785 | |
| }, | |
| { | |
| "epoch": 0.8534543293901583, | |
| "grad_norm": 0.06846630573272705, | |
| "learning_rate": 0.00012678509172884617, | |
| "loss": 0.0774, | |
| "num_input_tokens_seen": 17139760, | |
| "step": 1975, | |
| "train_runtime": 54506.5848, | |
| "train_tokens_per_second": 314.453 | |
| }, | |
| { | |
| "epoch": 0.8556149732620321, | |
| "grad_norm": 0.062459319829940796, | |
| "learning_rate": 0.00012645084748087236, | |
| "loss": 0.0702, | |
| "num_input_tokens_seen": 17183136, | |
| "step": 1980, | |
| "train_runtime": 54528.9268, | |
| "train_tokens_per_second": 315.12 | |
| }, | |
| { | |
| "epoch": 0.8577756171339059, | |
| "grad_norm": 0.0698806494474411, | |
| "learning_rate": 0.00012611628519234094, | |
| "loss": 0.0707, | |
| "num_input_tokens_seen": 17226272, | |
| "step": 1985, | |
| "train_runtime": 54550.8877, | |
| "train_tokens_per_second": 315.784 | |
| }, | |
| { | |
| "epoch": 0.8599362610057797, | |
| "grad_norm": 0.06589354574680328, | |
| "learning_rate": 0.00012578140888597284, | |
| "loss": 0.0709, | |
| "num_input_tokens_seen": 17269712, | |
| "step": 1990, | |
| "train_runtime": 54572.7325, | |
| "train_tokens_per_second": 316.453 | |
| }, | |
| { | |
| "epoch": 0.8620969048776536, | |
| "grad_norm": 0.06981069594621658, | |
| "learning_rate": 0.00012544622258826464, | |
| "loss": 0.0736, | |
| "num_input_tokens_seen": 17312816, | |
| "step": 1995, | |
| "train_runtime": 54594.5932, | |
| "train_tokens_per_second": 317.116 | |
| }, | |
| { | |
| "epoch": 0.8642575487495273, | |
| "grad_norm": 0.05967501550912857, | |
| "learning_rate": 0.00012511073032944018, | |
| "loss": 0.0716, | |
| "num_input_tokens_seen": 17356192, | |
| "step": 2000, | |
| "train_runtime": 54617.9513, | |
| "train_tokens_per_second": 317.774 | |
| }, | |
| { | |
| "epoch": 0.8642575487495273, | |
| "eval_loss": 0.07185881584882736, | |
| "eval_runtime": 8674.4485, | |
| "eval_samples_per_second": 1.067, | |
| "eval_steps_per_second": 0.534, | |
| "num_input_tokens_seen": 17356192, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8664181926214012, | |
| "grad_norm": 0.03572811186313629, | |
| "learning_rate": 0.0001247749361434022, | |
| "loss": 0.0743, | |
| "num_input_tokens_seen": 17399696, | |
| "step": 2005, | |
| "train_runtime": 63316.239, | |
| "train_tokens_per_second": 274.806 | |
| }, | |
| { | |
| "epoch": 0.8685788364932749, | |
| "grad_norm": 0.06791722029447556, | |
| "learning_rate": 0.00012443884406768368, | |
| "loss": 0.0736, | |
| "num_input_tokens_seen": 17443488, | |
| "step": 2010, | |
| "train_runtime": 63337.6458, | |
| "train_tokens_per_second": 275.405 | |
| }, | |
| { | |
| "epoch": 0.8707394803651488, | |
| "grad_norm": 0.06620905548334122, | |
| "learning_rate": 0.00012410245814339948, | |
| "loss": 0.0714, | |
| "num_input_tokens_seen": 17486960, | |
| "step": 2015, | |
| "train_runtime": 63359.6078, | |
| "train_tokens_per_second": 275.995 | |
| }, | |
| { | |
| "epoch": 0.8729001242370227, | |
| "grad_norm": 0.06703072041273117, | |
| "learning_rate": 0.0001237657824151975, | |
| "loss": 0.0704, | |
| "num_input_tokens_seen": 17530256, | |
| "step": 2020, | |
| "train_runtime": 63381.5042, | |
| "train_tokens_per_second": 276.583 | |
| }, | |
| { | |
| "epoch": 0.8750607681088964, | |
| "grad_norm": 0.0718189924955368, | |
| "learning_rate": 0.0001234288209312104, | |
| "loss": 0.0812, | |
| "num_input_tokens_seen": 17574496, | |
| "step": 2025, | |
| "train_runtime": 63403.8123, | |
| "train_tokens_per_second": 277.184 | |
| }, | |
| { | |
| "epoch": 0.8772214119807703, | |
| "grad_norm": 0.07962594926357269, | |
| "learning_rate": 0.0001230915777430065, | |
| "loss": 0.0722, | |
| "num_input_tokens_seen": 17618192, | |
| "step": 2030, | |
| "train_runtime": 63426.0337, | |
| "train_tokens_per_second": 277.775 | |
| }, | |
| { | |
| "epoch": 0.8793820558526441, | |
| "grad_norm": 0.07039056718349457, | |
| "learning_rate": 0.00012275405690554135, | |
| "loss": 0.0662, | |
| "num_input_tokens_seen": 17661008, | |
| "step": 2035, | |
| "train_runtime": 63448.4618, | |
| "train_tokens_per_second": 278.352 | |
| }, | |
| { | |
| "epoch": 0.8815426997245179, | |
| "grad_norm": 0.06130144000053406, | |
| "learning_rate": 0.00012241626247710906, | |
| "loss": 0.0712, | |
| "num_input_tokens_seen": 17704320, | |
| "step": 2040, | |
| "train_runtime": 63470.3468, | |
| "train_tokens_per_second": 278.938 | |
| }, | |
| { | |
| "epoch": 0.8837033435963917, | |
| "grad_norm": 0.06900149583816528, | |
| "learning_rate": 0.00012207819851929315, | |
| "loss": 0.0736, | |
| "num_input_tokens_seen": 17747808, | |
| "step": 2045, | |
| "train_runtime": 63492.3122, | |
| "train_tokens_per_second": 279.527 | |
| }, | |
| { | |
| "epoch": 0.8858639874682656, | |
| "grad_norm": 0.0547107569873333, | |
| "learning_rate": 0.00012173986909691799, | |
| "loss": 0.0737, | |
| "num_input_tokens_seen": 17791120, | |
| "step": 2050, | |
| "train_runtime": 63514.147, | |
| "train_tokens_per_second": 280.113 | |
| }, | |
| { | |
| "epoch": 0.8880246313401393, | |
| "grad_norm": 0.07016472518444061, | |
| "learning_rate": 0.0001214012782779999, | |
| "loss": 0.0714, | |
| "num_input_tokens_seen": 17834736, | |
| "step": 2055, | |
| "train_runtime": 63536.7563, | |
| "train_tokens_per_second": 280.7 | |
| }, | |
| { | |
| "epoch": 0.8901852752120132, | |
| "grad_norm": 0.060870055109262466, | |
| "learning_rate": 0.00012106243013369811, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 17877760, | |
| "step": 2060, | |
| "train_runtime": 63559.4064, | |
| "train_tokens_per_second": 281.276 | |
| }, | |
| { | |
| "epoch": 0.892345919083887, | |
| "grad_norm": 0.06822679936885834, | |
| "learning_rate": 0.00012072332873826595, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 17921072, | |
| "step": 2065, | |
| "train_runtime": 63580.9227, | |
| "train_tokens_per_second": 281.862 | |
| }, | |
| { | |
| "epoch": 0.8945065629557608, | |
| "grad_norm": 0.07840294390916824, | |
| "learning_rate": 0.00012038397816900177, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 17964768, | |
| "step": 2070, | |
| "train_runtime": 63602.8765, | |
| "train_tokens_per_second": 282.452 | |
| }, | |
| { | |
| "epoch": 0.8966672068276347, | |
| "grad_norm": 0.05988030880689621, | |
| "learning_rate": 0.00012004438250619991, | |
| "loss": 0.0713, | |
| "num_input_tokens_seen": 18008112, | |
| "step": 2075, | |
| "train_runtime": 63624.4842, | |
| "train_tokens_per_second": 283.037 | |
| }, | |
| { | |
| "epoch": 0.8988278506995084, | |
| "grad_norm": 0.05252789333462715, | |
| "learning_rate": 0.0001197045458331018, | |
| "loss": 0.075, | |
| "num_input_tokens_seen": 18051376, | |
| "step": 2080, | |
| "train_runtime": 63645.7848, | |
| "train_tokens_per_second": 283.622 | |
| }, | |
| { | |
| "epoch": 0.9009884945713823, | |
| "grad_norm": 0.05312652140855789, | |
| "learning_rate": 0.00011936447223584657, | |
| "loss": 0.0708, | |
| "num_input_tokens_seen": 18094832, | |
| "step": 2085, | |
| "train_runtime": 63667.9044, | |
| "train_tokens_per_second": 284.206 | |
| }, | |
| { | |
| "epoch": 0.9031491384432561, | |
| "grad_norm": 0.06478448957204819, | |
| "learning_rate": 0.00011902416580342221, | |
| "loss": 0.0715, | |
| "num_input_tokens_seen": 18138112, | |
| "step": 2090, | |
| "train_runtime": 63689.3636, | |
| "train_tokens_per_second": 284.79 | |
| }, | |
| { | |
| "epoch": 0.9053097823151299, | |
| "grad_norm": 0.07553625106811523, | |
| "learning_rate": 0.00011868363062761621, | |
| "loss": 0.0727, | |
| "num_input_tokens_seen": 18181984, | |
| "step": 2095, | |
| "train_runtime": 63711.3409, | |
| "train_tokens_per_second": 285.381 | |
| }, | |
| { | |
| "epoch": 0.9074704261870037, | |
| "grad_norm": 0.05751855671405792, | |
| "learning_rate": 0.00011834287080296644, | |
| "loss": 0.0714, | |
| "num_input_tokens_seen": 18225232, | |
| "step": 2100, | |
| "train_runtime": 63733.4595, | |
| "train_tokens_per_second": 285.96 | |
| }, | |
| { | |
| "epoch": 0.9096310700588776, | |
| "grad_norm": 0.06295677274465561, | |
| "learning_rate": 0.00011800189042671198, | |
| "loss": 0.0745, | |
| "num_input_tokens_seen": 18268656, | |
| "step": 2105, | |
| "train_runtime": 63755.0208, | |
| "train_tokens_per_second": 286.545 | |
| }, | |
| { | |
| "epoch": 0.9117917139307513, | |
| "grad_norm": 0.05820206552743912, | |
| "learning_rate": 0.0001176606935987437, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 18311760, | |
| "step": 2110, | |
| "train_runtime": 63776.8833, | |
| "train_tokens_per_second": 287.122 | |
| }, | |
| { | |
| "epoch": 0.9139523578026252, | |
| "grad_norm": 0.06826373189687729, | |
| "learning_rate": 0.00011731928442155508, | |
| "loss": 0.0661, | |
| "num_input_tokens_seen": 18354832, | |
| "step": 2115, | |
| "train_runtime": 63798.9017, | |
| "train_tokens_per_second": 287.698 | |
| }, | |
| { | |
| "epoch": 0.916113001674499, | |
| "grad_norm": 0.07378843426704407, | |
| "learning_rate": 0.00011697766700019289, | |
| "loss": 0.0793, | |
| "num_input_tokens_seen": 18398608, | |
| "step": 2120, | |
| "train_runtime": 63820.827, | |
| "train_tokens_per_second": 288.285 | |
| }, | |
| { | |
| "epoch": 0.9182736455463728, | |
| "grad_norm": 0.05657931789755821, | |
| "learning_rate": 0.0001166358454422077, | |
| "loss": 0.0736, | |
| "num_input_tokens_seen": 18442160, | |
| "step": 2125, | |
| "train_runtime": 63843.125, | |
| "train_tokens_per_second": 288.867 | |
| }, | |
| { | |
| "epoch": 0.9204342894182467, | |
| "grad_norm": 0.07037783414125443, | |
| "learning_rate": 0.0001162938238576047, | |
| "loss": 0.0711, | |
| "num_input_tokens_seen": 18485376, | |
| "step": 2130, | |
| "train_runtime": 63864.611, | |
| "train_tokens_per_second": 289.446 | |
| }, | |
| { | |
| "epoch": 0.9225949332901204, | |
| "grad_norm": 0.07411188632249832, | |
| "learning_rate": 0.00011595160635879407, | |
| "loss": 0.0704, | |
| "num_input_tokens_seen": 18528976, | |
| "step": 2135, | |
| "train_runtime": 63887.3619, | |
| "train_tokens_per_second": 290.026 | |
| }, | |
| { | |
| "epoch": 0.9247555771619943, | |
| "grad_norm": 0.06208517774939537, | |
| "learning_rate": 0.00011560919706054167, | |
| "loss": 0.067, | |
| "num_input_tokens_seen": 18571888, | |
| "step": 2140, | |
| "train_runtime": 63909.8073, | |
| "train_tokens_per_second": 290.595 | |
| }, | |
| { | |
| "epoch": 0.9269162210338681, | |
| "grad_norm": 0.07666601240634918, | |
| "learning_rate": 0.00011526660007991956, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 18615296, | |
| "step": 2145, | |
| "train_runtime": 63932.0127, | |
| "train_tokens_per_second": 291.173 | |
| }, | |
| { | |
| "epoch": 0.9290768649057419, | |
| "grad_norm": 0.06127588078379631, | |
| "learning_rate": 0.0001149238195362564, | |
| "loss": 0.0689, | |
| "num_input_tokens_seen": 18658384, | |
| "step": 2150, | |
| "train_runtime": 63953.3132, | |
| "train_tokens_per_second": 291.75 | |
| }, | |
| { | |
| "epoch": 0.9312375087776157, | |
| "grad_norm": 0.074986532330513, | |
| "learning_rate": 0.000114580859551088, | |
| "loss": 0.08, | |
| "num_input_tokens_seen": 18702240, | |
| "step": 2155, | |
| "train_runtime": 63975.8598, | |
| "train_tokens_per_second": 292.333 | |
| }, | |
| { | |
| "epoch": 0.9333981526494896, | |
| "grad_norm": 0.06027218699455261, | |
| "learning_rate": 0.00011423772424810775, | |
| "loss": 0.0711, | |
| "num_input_tokens_seen": 18745424, | |
| "step": 2160, | |
| "train_runtime": 63997.5378, | |
| "train_tokens_per_second": 292.909 | |
| }, | |
| { | |
| "epoch": 0.9355587965213633, | |
| "grad_norm": 0.0647510513663292, | |
| "learning_rate": 0.00011389441775311704, | |
| "loss": 0.0757, | |
| "num_input_tokens_seen": 18788960, | |
| "step": 2165, | |
| "train_runtime": 64019.1462, | |
| "train_tokens_per_second": 293.49 | |
| }, | |
| { | |
| "epoch": 0.9377194403932372, | |
| "grad_norm": 0.053142938762903214, | |
| "learning_rate": 0.00011355094419397563, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 18832304, | |
| "step": 2170, | |
| "train_runtime": 64040.9072, | |
| "train_tokens_per_second": 294.067 | |
| }, | |
| { | |
| "epoch": 0.939880084265111, | |
| "grad_norm": 0.07196088880300522, | |
| "learning_rate": 0.00011320730770055204, | |
| "loss": 0.0764, | |
| "num_input_tokens_seen": 18876144, | |
| "step": 2175, | |
| "train_runtime": 64063.2079, | |
| "train_tokens_per_second": 294.649 | |
| }, | |
| { | |
| "epoch": 0.9420407281369848, | |
| "grad_norm": 0.06955163925886154, | |
| "learning_rate": 0.00011286351240467387, | |
| "loss": 0.0775, | |
| "num_input_tokens_seen": 18920064, | |
| "step": 2180, | |
| "train_runtime": 64084.8585, | |
| "train_tokens_per_second": 295.235 | |
| }, | |
| { | |
| "epoch": 0.9442013720088587, | |
| "grad_norm": 0.08103214204311371, | |
| "learning_rate": 0.00011251956244007819, | |
| "loss": 0.0766, | |
| "num_input_tokens_seen": 18963888, | |
| "step": 2185, | |
| "train_runtime": 64106.7078, | |
| "train_tokens_per_second": 295.818 | |
| }, | |
| { | |
| "epoch": 0.9463620158807324, | |
| "grad_norm": 0.06878823786973953, | |
| "learning_rate": 0.0001121754619423617, | |
| "loss": 0.0731, | |
| "num_input_tokens_seen": 19007312, | |
| "step": 2190, | |
| "train_runtime": 64128.2154, | |
| "train_tokens_per_second": 296.395 | |
| }, | |
| { | |
| "epoch": 0.9485226597526063, | |
| "grad_norm": 0.05954969301819801, | |
| "learning_rate": 0.00011183121504893108, | |
| "loss": 0.0783, | |
| "num_input_tokens_seen": 19050768, | |
| "step": 2195, | |
| "train_runtime": 64150.9571, | |
| "train_tokens_per_second": 296.968 | |
| }, | |
| { | |
| "epoch": 0.9506833036244801, | |
| "grad_norm": 0.04972570016980171, | |
| "learning_rate": 0.00011148682589895339, | |
| "loss": 0.0689, | |
| "num_input_tokens_seen": 19094480, | |
| "step": 2200, | |
| "train_runtime": 64173.1029, | |
| "train_tokens_per_second": 297.546 | |
| }, | |
| { | |
| "epoch": 0.9506833036244801, | |
| "eval_loss": 0.07115475833415985, | |
| "eval_runtime": 716.0686, | |
| "eval_samples_per_second": 12.928, | |
| "eval_steps_per_second": 6.464, | |
| "num_input_tokens_seen": 19094480, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9528439474963539, | |
| "grad_norm": 0.051516707986593246, | |
| "learning_rate": 0.00011114229863330596, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 19137488, | |
| "step": 2205, | |
| "train_runtime": 64915.0379, | |
| "train_tokens_per_second": 294.808 | |
| }, | |
| { | |
| "epoch": 0.9550045913682277, | |
| "grad_norm": 0.06157120689749718, | |
| "learning_rate": 0.00011079763739452696, | |
| "loss": 0.0687, | |
| "num_input_tokens_seen": 19180768, | |
| "step": 2210, | |
| "train_runtime": 64936.8917, | |
| "train_tokens_per_second": 295.376 | |
| }, | |
| { | |
| "epoch": 0.9571652352401016, | |
| "grad_norm": 0.07093177735805511, | |
| "learning_rate": 0.00011045284632676536, | |
| "loss": 0.0791, | |
| "num_input_tokens_seen": 19224560, | |
| "step": 2215, | |
| "train_runtime": 64958.8453, | |
| "train_tokens_per_second": 295.95 | |
| }, | |
| { | |
| "epoch": 0.9593258791119754, | |
| "grad_norm": 0.08016602694988251, | |
| "learning_rate": 0.00011010792957573115, | |
| "loss": 0.0719, | |
| "num_input_tokens_seen": 19267696, | |
| "step": 2220, | |
| "train_runtime": 64981.2868, | |
| "train_tokens_per_second": 296.511 | |
| }, | |
| { | |
| "epoch": 0.9614865229838492, | |
| "grad_norm": 0.06384464353322983, | |
| "learning_rate": 0.00010976289128864556, | |
| "loss": 0.0698, | |
| "num_input_tokens_seen": 19311152, | |
| "step": 2225, | |
| "train_runtime": 65002.6752, | |
| "train_tokens_per_second": 297.082 | |
| }, | |
| { | |
| "epoch": 0.963647166855723, | |
| "grad_norm": 0.06991935521364212, | |
| "learning_rate": 0.00010941773561419117, | |
| "loss": 0.0764, | |
| "num_input_tokens_seen": 19354544, | |
| "step": 2230, | |
| "train_runtime": 65024.4873, | |
| "train_tokens_per_second": 297.65 | |
| }, | |
| { | |
| "epoch": 0.9658078107275968, | |
| "grad_norm": 0.06561180204153061, | |
| "learning_rate": 0.00010907246670246194, | |
| "loss": 0.0678, | |
| "num_input_tokens_seen": 19397824, | |
| "step": 2235, | |
| "train_runtime": 65047.126, | |
| "train_tokens_per_second": 298.212 | |
| }, | |
| { | |
| "epoch": 0.9679684545994707, | |
| "grad_norm": 0.0609147846698761, | |
| "learning_rate": 0.00010872708870491337, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 19441136, | |
| "step": 2240, | |
| "train_runtime": 65069.6388, | |
| "train_tokens_per_second": 298.774 | |
| }, | |
| { | |
| "epoch": 0.9701290984713444, | |
| "grad_norm": 0.049989230930805206, | |
| "learning_rate": 0.00010838160577431269, | |
| "loss": 0.0709, | |
| "num_input_tokens_seen": 19484528, | |
| "step": 2245, | |
| "train_runtime": 65092.5491, | |
| "train_tokens_per_second": 299.336 | |
| }, | |
| { | |
| "epoch": 0.9722897423432183, | |
| "grad_norm": 0.05758450925350189, | |
| "learning_rate": 0.0001080360220646887, | |
| "loss": 0.0738, | |
| "num_input_tokens_seen": 19527920, | |
| "step": 2250, | |
| "train_runtime": 65114.2821, | |
| "train_tokens_per_second": 299.902 | |
| }, | |
| { | |
| "epoch": 0.9744503862150921, | |
| "grad_norm": 0.06077814847230911, | |
| "learning_rate": 0.00010769034173128207, | |
| "loss": 0.0837, | |
| "num_input_tokens_seen": 19571984, | |
| "step": 2255, | |
| "train_runtime": 65136.2202, | |
| "train_tokens_per_second": 300.478 | |
| }, | |
| { | |
| "epoch": 0.9766110300869659, | |
| "grad_norm": 0.05624840408563614, | |
| "learning_rate": 0.00010734456893049514, | |
| "loss": 0.0733, | |
| "num_input_tokens_seen": 19615088, | |
| "step": 2260, | |
| "train_runtime": 65158.2319, | |
| "train_tokens_per_second": 301.038 | |
| }, | |
| { | |
| "epoch": 0.9787716739588397, | |
| "grad_norm": 0.08435752242803574, | |
| "learning_rate": 0.00010699870781984218, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 19658224, | |
| "step": 2265, | |
| "train_runtime": 65180.7736, | |
| "train_tokens_per_second": 301.595 | |
| }, | |
| { | |
| "epoch": 0.9809323178307136, | |
| "grad_norm": 0.06772467494010925, | |
| "learning_rate": 0.00010665276255789923, | |
| "loss": 0.0609, | |
| "num_input_tokens_seen": 19700912, | |
| "step": 2270, | |
| "train_runtime": 65201.7743, | |
| "train_tokens_per_second": 302.153 | |
| }, | |
| { | |
| "epoch": 0.9830929617025874, | |
| "grad_norm": 0.06909680366516113, | |
| "learning_rate": 0.00010630673730425412, | |
| "loss": 0.0692, | |
| "num_input_tokens_seen": 19744352, | |
| "step": 2275, | |
| "train_runtime": 65223.8375, | |
| "train_tokens_per_second": 302.717 | |
| }, | |
| { | |
| "epoch": 0.9852536055744612, | |
| "grad_norm": 0.06532754749059677, | |
| "learning_rate": 0.0001059606362194565, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 19787488, | |
| "step": 2280, | |
| "train_runtime": 65245.6541, | |
| "train_tokens_per_second": 303.277 | |
| }, | |
| { | |
| "epoch": 0.987414249446335, | |
| "grad_norm": 0.06435127556324005, | |
| "learning_rate": 0.00010561446346496786, | |
| "loss": 0.0713, | |
| "num_input_tokens_seen": 19830608, | |
| "step": 2285, | |
| "train_runtime": 65267.3727, | |
| "train_tokens_per_second": 303.836 | |
| }, | |
| { | |
| "epoch": 0.9895748933182088, | |
| "grad_norm": 0.0690252035856247, | |
| "learning_rate": 0.00010526822320311136, | |
| "loss": 0.0785, | |
| "num_input_tokens_seen": 19873984, | |
| "step": 2290, | |
| "train_runtime": 65289.8265, | |
| "train_tokens_per_second": 304.396 | |
| }, | |
| { | |
| "epoch": 0.9917355371900827, | |
| "grad_norm": 0.06570211052894592, | |
| "learning_rate": 0.00010492191959702187, | |
| "loss": 0.0677, | |
| "num_input_tokens_seen": 19917008, | |
| "step": 2295, | |
| "train_runtime": 65312.2643, | |
| "train_tokens_per_second": 304.951 | |
| }, | |
| { | |
| "epoch": 0.9938961810619564, | |
| "grad_norm": 0.06227204203605652, | |
| "learning_rate": 0.00010457555681059597, | |
| "loss": 0.0683, | |
| "num_input_tokens_seen": 19960096, | |
| "step": 2300, | |
| "train_runtime": 65334.0717, | |
| "train_tokens_per_second": 305.508 | |
| }, | |
| { | |
| "epoch": 0.9960568249338303, | |
| "grad_norm": 0.08121524751186371, | |
| "learning_rate": 0.00010422913900844169, | |
| "loss": 0.0766, | |
| "num_input_tokens_seen": 20004080, | |
| "step": 2305, | |
| "train_runtime": 65356.4613, | |
| "train_tokens_per_second": 306.077 | |
| }, | |
| { | |
| "epoch": 0.9982174688057041, | |
| "grad_norm": 0.07001639157533646, | |
| "learning_rate": 0.0001038826703558287, | |
| "loss": 0.0694, | |
| "num_input_tokens_seen": 20047552, | |
| "step": 2310, | |
| "train_runtime": 65378.934, | |
| "train_tokens_per_second": 306.636 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2362603098154068, | |
| "learning_rate": 0.00010353615501863799, | |
| "loss": 0.0615, | |
| "num_input_tokens_seen": 20082296, | |
| "step": 2315, | |
| "train_runtime": 65397.7722, | |
| "train_tokens_per_second": 307.079 | |
| }, | |
| { | |
| "epoch": 1.0021606438718738, | |
| "grad_norm": 0.07865723967552185, | |
| "learning_rate": 0.00010318959716331191, | |
| "loss": 0.0746, | |
| "num_input_tokens_seen": 20126264, | |
| "step": 2320, | |
| "train_runtime": 65419.76, | |
| "train_tokens_per_second": 307.648 | |
| }, | |
| { | |
| "epoch": 1.0043212877437477, | |
| "grad_norm": 0.061586473137140274, | |
| "learning_rate": 0.00010284300095680403, | |
| "loss": 0.0648, | |
| "num_input_tokens_seen": 20169208, | |
| "step": 2325, | |
| "train_runtime": 65440.9527, | |
| "train_tokens_per_second": 308.205 | |
| }, | |
| { | |
| "epoch": 1.0064819316156215, | |
| "grad_norm": 0.06647315621376038, | |
| "learning_rate": 0.00010249637056652906, | |
| "loss": 0.064, | |
| "num_input_tokens_seen": 20212504, | |
| "step": 2330, | |
| "train_runtime": 65462.4612, | |
| "train_tokens_per_second": 308.765 | |
| }, | |
| { | |
| "epoch": 1.0086425754874953, | |
| "grad_norm": 0.0650409534573555, | |
| "learning_rate": 0.00010214971016031274, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 20255400, | |
| "step": 2335, | |
| "train_runtime": 65484.7168, | |
| "train_tokens_per_second": 309.315 | |
| }, | |
| { | |
| "epoch": 1.010803219359369, | |
| "grad_norm": 0.06883776932954788, | |
| "learning_rate": 0.00010180302390634168, | |
| "loss": 0.0637, | |
| "num_input_tokens_seen": 20298760, | |
| "step": 2340, | |
| "train_runtime": 65506.2982, | |
| "train_tokens_per_second": 309.875 | |
| }, | |
| { | |
| "epoch": 1.012963863231243, | |
| "grad_norm": 0.06613084673881531, | |
| "learning_rate": 0.00010145631597311334, | |
| "loss": 0.0703, | |
| "num_input_tokens_seen": 20342456, | |
| "step": 2345, | |
| "train_runtime": 65528.323, | |
| "train_tokens_per_second": 310.438 | |
| }, | |
| { | |
| "epoch": 1.0151245071031167, | |
| "grad_norm": 0.08105847239494324, | |
| "learning_rate": 0.00010110959052938575, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 20386088, | |
| "step": 2350, | |
| "train_runtime": 65550.3814, | |
| "train_tokens_per_second": 310.999 | |
| }, | |
| { | |
| "epoch": 1.0172851509749905, | |
| "grad_norm": 0.06776931136846542, | |
| "learning_rate": 0.00010076285174412759, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 20429368, | |
| "step": 2355, | |
| "train_runtime": 65571.624, | |
| "train_tokens_per_second": 311.558 | |
| }, | |
| { | |
| "epoch": 1.0194457948468643, | |
| "grad_norm": 0.08375387638807297, | |
| "learning_rate": 0.00010041610378646789, | |
| "loss": 0.0653, | |
| "num_input_tokens_seen": 20472888, | |
| "step": 2360, | |
| "train_runtime": 65593.4934, | |
| "train_tokens_per_second": 312.118 | |
| }, | |
| { | |
| "epoch": 1.0216064387187382, | |
| "grad_norm": 0.061812516301870346, | |
| "learning_rate": 0.00010006935082564599, | |
| "loss": 0.0702, | |
| "num_input_tokens_seen": 20516760, | |
| "step": 2365, | |
| "train_runtime": 65615.2348, | |
| "train_tokens_per_second": 312.683 | |
| }, | |
| { | |
| "epoch": 1.023767082590612, | |
| "grad_norm": 0.061185307800769806, | |
| "learning_rate": 9.97225970309614e-05, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 20560472, | |
| "step": 2370, | |
| "train_runtime": 65637.3028, | |
| "train_tokens_per_second": 313.244 | |
| }, | |
| { | |
| "epoch": 1.0259277264624858, | |
| "grad_norm": 0.06244645267724991, | |
| "learning_rate": 9.937584657172361e-05, | |
| "loss": 0.0696, | |
| "num_input_tokens_seen": 20604024, | |
| "step": 2375, | |
| "train_runtime": 65659.1613, | |
| "train_tokens_per_second": 313.803 | |
| }, | |
| { | |
| "epoch": 1.0280883703343597, | |
| "grad_norm": 0.07663462311029434, | |
| "learning_rate": 9.902910361720203e-05, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 20647528, | |
| "step": 2380, | |
| "train_runtime": 65681.0633, | |
| "train_tokens_per_second": 314.36 | |
| }, | |
| { | |
| "epoch": 1.0302490142062335, | |
| "grad_norm": 0.07399339973926544, | |
| "learning_rate": 9.868237233657588e-05, | |
| "loss": 0.0638, | |
| "num_input_tokens_seen": 20690680, | |
| "step": 2385, | |
| "train_runtime": 65702.4149, | |
| "train_tokens_per_second": 314.915 | |
| }, | |
| { | |
| "epoch": 1.0324096580781073, | |
| "grad_norm": 0.050758518278598785, | |
| "learning_rate": 9.833565689888395e-05, | |
| "loss": 0.0567, | |
| "num_input_tokens_seen": 20733304, | |
| "step": 2390, | |
| "train_runtime": 65723.6939, | |
| "train_tokens_per_second": 315.462 | |
| }, | |
| { | |
| "epoch": 1.034570301949981, | |
| "grad_norm": 0.06294345110654831, | |
| "learning_rate": 9.798896147297457e-05, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 20776664, | |
| "step": 2395, | |
| "train_runtime": 65745.5546, | |
| "train_tokens_per_second": 316.016 | |
| }, | |
| { | |
| "epoch": 1.036730945821855, | |
| "grad_norm": 0.08008322864770889, | |
| "learning_rate": 9.764229022745543e-05, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 20820072, | |
| "step": 2400, | |
| "train_runtime": 65767.2053, | |
| "train_tokens_per_second": 316.572 | |
| }, | |
| { | |
| "epoch": 1.036730945821855, | |
| "eval_loss": 0.07095986604690552, | |
| "eval_runtime": 31720.4675, | |
| "eval_samples_per_second": 0.292, | |
| "eval_steps_per_second": 0.146, | |
| "num_input_tokens_seen": 20820072, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.0388915896937287, | |
| "grad_norm": 0.06503720581531525, | |
| "learning_rate": 9.72956473306435e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 20863896, | |
| "step": 2405, | |
| "train_runtime": 97511.5446, | |
| "train_tokens_per_second": 213.963 | |
| }, | |
| { | |
| "epoch": 1.0410522335656025, | |
| "grad_norm": 0.070051409304142, | |
| "learning_rate": 9.694903695051488e-05, | |
| "loss": 0.0648, | |
| "num_input_tokens_seen": 20907080, | |
| "step": 2410, | |
| "train_runtime": 97533.7562, | |
| "train_tokens_per_second": 214.357 | |
| }, | |
| { | |
| "epoch": 1.0432128774374765, | |
| "grad_norm": 0.05461447313427925, | |
| "learning_rate": 9.660246325465471e-05, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 20950152, | |
| "step": 2415, | |
| "train_runtime": 97554.9301, | |
| "train_tokens_per_second": 214.752 | |
| }, | |
| { | |
| "epoch": 1.0453735213093502, | |
| "grad_norm": 0.07258272171020508, | |
| "learning_rate": 9.625593041020701e-05, | |
| "loss": 0.0643, | |
| "num_input_tokens_seen": 20993624, | |
| "step": 2420, | |
| "train_runtime": 97576.2055, | |
| "train_tokens_per_second": 215.151 | |
| }, | |
| { | |
| "epoch": 1.047534165181224, | |
| "grad_norm": 0.07304082065820694, | |
| "learning_rate": 9.590944258382466e-05, | |
| "loss": 0.065, | |
| "num_input_tokens_seen": 21036984, | |
| "step": 2425, | |
| "train_runtime": 97597.7384, | |
| "train_tokens_per_second": 215.548 | |
| }, | |
| { | |
| "epoch": 1.0496948090530978, | |
| "grad_norm": 0.08018536120653152, | |
| "learning_rate": 9.556300394161919e-05, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 21080616, | |
| "step": 2430, | |
| "train_runtime": 97619.4508, | |
| "train_tokens_per_second": 215.947 | |
| }, | |
| { | |
| "epoch": 1.0518554529249717, | |
| "grad_norm": 0.06374417245388031, | |
| "learning_rate": 9.52166186491108e-05, | |
| "loss": 0.067, | |
| "num_input_tokens_seen": 21124120, | |
| "step": 2435, | |
| "train_runtime": 97641.5898, | |
| "train_tokens_per_second": 216.343 | |
| }, | |
| { | |
| "epoch": 1.0540160967968455, | |
| "grad_norm": 0.056240521371364594, | |
| "learning_rate": 9.48702908711782e-05, | |
| "loss": 0.0604, | |
| "num_input_tokens_seen": 21167000, | |
| "step": 2440, | |
| "train_runtime": 97663.0543, | |
| "train_tokens_per_second": 216.735 | |
| }, | |
| { | |
| "epoch": 1.0561767406687192, | |
| "grad_norm": 0.08230195939540863, | |
| "learning_rate": 9.452402477200852e-05, | |
| "loss": 0.0645, | |
| "num_input_tokens_seen": 21210232, | |
| "step": 2445, | |
| "train_runtime": 97685.1274, | |
| "train_tokens_per_second": 217.129 | |
| }, | |
| { | |
| "epoch": 1.058337384540593, | |
| "grad_norm": 0.060752853751182556, | |
| "learning_rate": 9.417782451504737e-05, | |
| "loss": 0.0681, | |
| "num_input_tokens_seen": 21253656, | |
| "step": 2450, | |
| "train_runtime": 97706.5559, | |
| "train_tokens_per_second": 217.525 | |
| }, | |
| { | |
| "epoch": 1.060498028412467, | |
| "grad_norm": 0.07154619693756104, | |
| "learning_rate": 9.383169426294861e-05, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 21296840, | |
| "step": 2455, | |
| "train_runtime": 97728.2395, | |
| "train_tokens_per_second": 217.919 | |
| }, | |
| { | |
| "epoch": 1.0626586722843407, | |
| "grad_norm": 0.06834197789430618, | |
| "learning_rate": 9.348563817752437e-05, | |
| "loss": 0.0645, | |
| "num_input_tokens_seen": 21340056, | |
| "step": 2460, | |
| "train_runtime": 97750.0436, | |
| "train_tokens_per_second": 218.312 | |
| }, | |
| { | |
| "epoch": 1.0648193161562145, | |
| "grad_norm": 0.07614444941282272, | |
| "learning_rate": 9.313966041969501e-05, | |
| "loss": 0.0695, | |
| "num_input_tokens_seen": 21383464, | |
| "step": 2465, | |
| "train_runtime": 97772.6309, | |
| "train_tokens_per_second": 218.706 | |
| }, | |
| { | |
| "epoch": 1.0669799600280885, | |
| "grad_norm": 0.06275052577257156, | |
| "learning_rate": 9.279376514943915e-05, | |
| "loss": 0.0711, | |
| "num_input_tokens_seen": 21426760, | |
| "step": 2470, | |
| "train_runtime": 97794.7669, | |
| "train_tokens_per_second": 219.099 | |
| }, | |
| { | |
| "epoch": 1.0691406038999622, | |
| "grad_norm": 0.061219897121191025, | |
| "learning_rate": 9.244795652574354e-05, | |
| "loss": 0.0659, | |
| "num_input_tokens_seen": 21470104, | |
| "step": 2475, | |
| "train_runtime": 97816.8904, | |
| "train_tokens_per_second": 219.493 | |
| }, | |
| { | |
| "epoch": 1.071301247771836, | |
| "grad_norm": 0.06351316720247269, | |
| "learning_rate": 9.210223870655312e-05, | |
| "loss": 0.0635, | |
| "num_input_tokens_seen": 21513112, | |
| "step": 2480, | |
| "train_runtime": 97837.7716, | |
| "train_tokens_per_second": 219.886 | |
| }, | |
| { | |
| "epoch": 1.0734618916437098, | |
| "grad_norm": 0.06057807803153992, | |
| "learning_rate": 9.175661584872103e-05, | |
| "loss": 0.0681, | |
| "num_input_tokens_seen": 21556600, | |
| "step": 2485, | |
| "train_runtime": 97859.0562, | |
| "train_tokens_per_second": 220.282 | |
| }, | |
| { | |
| "epoch": 1.0756225355155837, | |
| "grad_norm": 0.06607088446617126, | |
| "learning_rate": 9.141109210795859e-05, | |
| "loss": 0.0631, | |
| "num_input_tokens_seen": 21599688, | |
| "step": 2490, | |
| "train_runtime": 97880.2066, | |
| "train_tokens_per_second": 220.675 | |
| }, | |
| { | |
| "epoch": 1.0777831793874575, | |
| "grad_norm": 0.08129261434078217, | |
| "learning_rate": 9.106567163878533e-05, | |
| "loss": 0.0681, | |
| "num_input_tokens_seen": 21642872, | |
| "step": 2495, | |
| "train_runtime": 97901.951, | |
| "train_tokens_per_second": 221.067 | |
| }, | |
| { | |
| "epoch": 1.0799438232593312, | |
| "grad_norm": 0.06941742449998856, | |
| "learning_rate": 9.072035859447913e-05, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 21686264, | |
| "step": 2500, | |
| "train_runtime": 97923.7723, | |
| "train_tokens_per_second": 221.461 | |
| }, | |
| { | |
| "epoch": 1.082104467131205, | |
| "grad_norm": 0.06604190915822983, | |
| "learning_rate": 9.037515712702613e-05, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 21729544, | |
| "step": 2505, | |
| "train_runtime": 97945.6763, | |
| "train_tokens_per_second": 221.853 | |
| }, | |
| { | |
| "epoch": 1.084265111003079, | |
| "grad_norm": 0.0760832205414772, | |
| "learning_rate": 9.003007138707095e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 21773240, | |
| "step": 2510, | |
| "train_runtime": 97968.2869, | |
| "train_tokens_per_second": 222.248 | |
| }, | |
| { | |
| "epoch": 1.0864257548749527, | |
| "grad_norm": 0.06299443542957306, | |
| "learning_rate": 8.968510552386668e-05, | |
| "loss": 0.0663, | |
| "num_input_tokens_seen": 21816552, | |
| "step": 2515, | |
| "train_runtime": 97989.9696, | |
| "train_tokens_per_second": 222.641 | |
| }, | |
| { | |
| "epoch": 1.0885863987468265, | |
| "grad_norm": 0.062478598207235336, | |
| "learning_rate": 8.93402636852251e-05, | |
| "loss": 0.0662, | |
| "num_input_tokens_seen": 21860056, | |
| "step": 2520, | |
| "train_runtime": 98011.1485, | |
| "train_tokens_per_second": 223.036 | |
| }, | |
| { | |
| "epoch": 1.0907470426187005, | |
| "grad_norm": 0.08520319312810898, | |
| "learning_rate": 8.899555001746664e-05, | |
| "loss": 0.0755, | |
| "num_input_tokens_seen": 21903896, | |
| "step": 2525, | |
| "train_runtime": 98033.6589, | |
| "train_tokens_per_second": 223.432 | |
| }, | |
| { | |
| "epoch": 1.0929076864905742, | |
| "grad_norm": 0.060366444289684296, | |
| "learning_rate": 8.865096866537071e-05, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 21947240, | |
| "step": 2530, | |
| "train_runtime": 98055.8604, | |
| "train_tokens_per_second": 223.824 | |
| }, | |
| { | |
| "epoch": 1.095068330362448, | |
| "grad_norm": 0.06165176257491112, | |
| "learning_rate": 8.83065237721257e-05, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 21990456, | |
| "step": 2535, | |
| "train_runtime": 98077.877, | |
| "train_tokens_per_second": 224.214 | |
| }, | |
| { | |
| "epoch": 1.0972289742343218, | |
| "grad_norm": 0.06571424752473831, | |
| "learning_rate": 8.796221947927932e-05, | |
| "loss": 0.0681, | |
| "num_input_tokens_seen": 22033624, | |
| "step": 2540, | |
| "train_runtime": 98099.4736, | |
| "train_tokens_per_second": 224.605 | |
| }, | |
| { | |
| "epoch": 1.0993896181061957, | |
| "grad_norm": 0.06270553916692734, | |
| "learning_rate": 8.761805992668869e-05, | |
| "loss": 0.0628, | |
| "num_input_tokens_seen": 22076664, | |
| "step": 2545, | |
| "train_runtime": 98121.768, | |
| "train_tokens_per_second": 224.993 | |
| }, | |
| { | |
| "epoch": 1.1015502619780695, | |
| "grad_norm": 0.0668216124176979, | |
| "learning_rate": 8.727404925247058e-05, | |
| "loss": 0.0643, | |
| "num_input_tokens_seen": 22119784, | |
| "step": 2550, | |
| "train_runtime": 98142.8207, | |
| "train_tokens_per_second": 225.384 | |
| }, | |
| { | |
| "epoch": 1.1037109058499432, | |
| "grad_norm": 0.07844366133213043, | |
| "learning_rate": 8.693019159295176e-05, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 22163224, | |
| "step": 2555, | |
| "train_runtime": 98164.731, | |
| "train_tokens_per_second": 225.776 | |
| }, | |
| { | |
| "epoch": 1.105871549721817, | |
| "grad_norm": 0.05678049847483635, | |
| "learning_rate": 8.658649108261899e-05, | |
| "loss": 0.066, | |
| "num_input_tokens_seen": 22206840, | |
| "step": 2560, | |
| "train_runtime": 98186.7714, | |
| "train_tokens_per_second": 226.169 | |
| }, | |
| { | |
| "epoch": 1.108032193593691, | |
| "grad_norm": 0.07098106294870377, | |
| "learning_rate": 8.624295185406964e-05, | |
| "loss": 0.0658, | |
| "num_input_tokens_seen": 22250264, | |
| "step": 2565, | |
| "train_runtime": 98208.9343, | |
| "train_tokens_per_second": 226.56 | |
| }, | |
| { | |
| "epoch": 1.1101928374655647, | |
| "grad_norm": 0.07217643409967422, | |
| "learning_rate": 8.589957803796187e-05, | |
| "loss": 0.0639, | |
| "num_input_tokens_seen": 22293448, | |
| "step": 2570, | |
| "train_runtime": 98230.9853, | |
| "train_tokens_per_second": 226.949 | |
| }, | |
| { | |
| "epoch": 1.1123534813374385, | |
| "grad_norm": 0.06756918132305145, | |
| "learning_rate": 8.555637376296489e-05, | |
| "loss": 0.0631, | |
| "num_input_tokens_seen": 22336632, | |
| "step": 2575, | |
| "train_runtime": 98252.5038, | |
| "train_tokens_per_second": 227.339 | |
| }, | |
| { | |
| "epoch": 1.1145141252093125, | |
| "grad_norm": 0.05753394216299057, | |
| "learning_rate": 8.521334315570939e-05, | |
| "loss": 0.0635, | |
| "num_input_tokens_seen": 22379848, | |
| "step": 2580, | |
| "train_runtime": 98274.0744, | |
| "train_tokens_per_second": 227.729 | |
| }, | |
| { | |
| "epoch": 1.1166747690811862, | |
| "grad_norm": 0.06545528769493103, | |
| "learning_rate": 8.48704903407379e-05, | |
| "loss": 0.0658, | |
| "num_input_tokens_seen": 22423064, | |
| "step": 2585, | |
| "train_runtime": 98296.1108, | |
| "train_tokens_per_second": 228.118 | |
| }, | |
| { | |
| "epoch": 1.11883541295306, | |
| "grad_norm": 0.07646580785512924, | |
| "learning_rate": 8.45278194404552e-05, | |
| "loss": 0.0751, | |
| "num_input_tokens_seen": 22466600, | |
| "step": 2590, | |
| "train_runtime": 98317.5035, | |
| "train_tokens_per_second": 228.511 | |
| }, | |
| { | |
| "epoch": 1.1209960568249338, | |
| "grad_norm": 0.06202688813209534, | |
| "learning_rate": 8.41853345750788e-05, | |
| "loss": 0.0626, | |
| "num_input_tokens_seen": 22509448, | |
| "step": 2595, | |
| "train_runtime": 98338.7439, | |
| "train_tokens_per_second": 228.897 | |
| }, | |
| { | |
| "epoch": 1.1231567006968077, | |
| "grad_norm": 0.05783059075474739, | |
| "learning_rate": 8.384303986258932e-05, | |
| "loss": 0.0635, | |
| "num_input_tokens_seen": 22552760, | |
| "step": 2600, | |
| "train_runtime": 98360.0854, | |
| "train_tokens_per_second": 229.288 | |
| }, | |
| { | |
| "epoch": 1.1231567006968077, | |
| "eval_loss": 0.07066521048545837, | |
| "eval_runtime": 710.5608, | |
| "eval_samples_per_second": 13.028, | |
| "eval_steps_per_second": 6.515, | |
| "num_input_tokens_seen": 22552760, | |
| "step": 2600 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4630, | |
| "num_input_tokens_seen": 22552760, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0270836768097075e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |