{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 46.85408299866131, "eval_steps": 500, "global_step": 35000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013386880856760375, "grad_norm": 7.699293613433838, "learning_rate": 1.8e-07, "loss": 0.8145, "step": 10 }, { "epoch": 0.02677376171352075, "grad_norm": 7.854701995849609, "learning_rate": 3.8e-07, "loss": 0.8047, "step": 20 }, { "epoch": 0.040160642570281124, "grad_norm": 9.568936347961426, "learning_rate": 5.8e-07, "loss": 0.7938, "step": 30 }, { "epoch": 0.0535475234270415, "grad_norm": 5.773306846618652, "learning_rate": 7.8e-07, "loss": 0.713, "step": 40 }, { "epoch": 0.06693440428380187, "grad_norm": 7.463155746459961, "learning_rate": 9.8e-07, "loss": 0.6558, "step": 50 }, { "epoch": 0.08032128514056225, "grad_norm": 4.017899513244629, "learning_rate": 1.18e-06, "loss": 0.57, "step": 60 }, { "epoch": 0.09370816599732262, "grad_norm": 2.7013120651245117, "learning_rate": 1.3800000000000001e-06, "loss": 0.4094, "step": 70 }, { "epoch": 0.107095046854083, "grad_norm": 1.774193525314331, "learning_rate": 1.5800000000000003e-06, "loss": 0.3575, "step": 80 }, { "epoch": 0.12048192771084337, "grad_norm": 3.1615118980407715, "learning_rate": 1.7800000000000001e-06, "loss": 0.2979, "step": 90 }, { "epoch": 0.13386880856760375, "grad_norm": 1.7236164808273315, "learning_rate": 1.98e-06, "loss": 0.2672, "step": 100 }, { "epoch": 0.14725568942436412, "grad_norm": 1.8042762279510498, "learning_rate": 2.1800000000000003e-06, "loss": 0.2271, "step": 110 }, { "epoch": 0.1606425702811245, "grad_norm": 1.1274936199188232, "learning_rate": 2.38e-06, "loss": 0.2109, "step": 120 }, { "epoch": 0.17402945113788487, "grad_norm": 1.2276346683502197, "learning_rate": 2.5800000000000003e-06, "loss": 0.1943, "step": 130 }, { "epoch": 0.18741633199464525, "grad_norm": 1.080681562423706, "learning_rate": 2.78e-06, "loss": 0.1808, "step": 140 }, { "epoch": 0.20080321285140562, "grad_norm": 1.2628917694091797, "learning_rate": 2.9800000000000003e-06, "loss": 0.1662, "step": 150 }, { "epoch": 0.214190093708166, "grad_norm": 0.8300891518592834, "learning_rate": 3.1800000000000005e-06, "loss": 0.152, "step": 160 }, { "epoch": 0.22757697456492637, "grad_norm": 1.1131463050842285, "learning_rate": 3.38e-06, "loss": 0.1402, "step": 170 }, { "epoch": 0.24096385542168675, "grad_norm": 0.8200536370277405, "learning_rate": 3.58e-06, "loss": 0.1371, "step": 180 }, { "epoch": 0.2543507362784471, "grad_norm": 1.1000646352767944, "learning_rate": 3.7800000000000002e-06, "loss": 0.1359, "step": 190 }, { "epoch": 0.2677376171352075, "grad_norm": 0.7446349859237671, "learning_rate": 3.98e-06, "loss": 0.1293, "step": 200 }, { "epoch": 0.28112449799196787, "grad_norm": 0.7598780989646912, "learning_rate": 4.18e-06, "loss": 0.1223, "step": 210 }, { "epoch": 0.29451137884872824, "grad_norm": 1.104144811630249, "learning_rate": 4.38e-06, "loss": 0.1177, "step": 220 }, { "epoch": 0.3078982597054886, "grad_norm": 0.6232774257659912, "learning_rate": 4.58e-06, "loss": 0.113, "step": 230 }, { "epoch": 0.321285140562249, "grad_norm": 0.9826235771179199, "learning_rate": 4.780000000000001e-06, "loss": 0.1178, "step": 240 }, { "epoch": 0.33467202141900937, "grad_norm": 0.7346672415733337, "learning_rate": 4.98e-06, "loss": 0.1017, "step": 250 }, { "epoch": 0.34805890227576974, "grad_norm": 0.8083603382110596, "learning_rate": 5.18e-06, "loss": 0.1039, "step": 260 }, { "epoch": 0.3614457831325301, "grad_norm": 0.9930930137634277, "learning_rate": 5.38e-06, "loss": 0.1023, "step": 270 }, { "epoch": 0.3748326639892905, "grad_norm": 0.5870115160942078, "learning_rate": 5.580000000000001e-06, "loss": 0.0903, "step": 280 }, { "epoch": 0.38821954484605087, "grad_norm": 0.6631126999855042, "learning_rate": 5.78e-06, "loss": 0.0908, "step": 290 }, { "epoch": 0.40160642570281124, "grad_norm": 0.7240543961524963, "learning_rate": 5.98e-06, "loss": 0.0955, "step": 300 }, { "epoch": 0.4149933065595716, "grad_norm": 1.1783123016357422, "learning_rate": 6.18e-06, "loss": 0.0902, "step": 310 }, { "epoch": 0.428380187416332, "grad_norm": 0.7833058834075928, "learning_rate": 6.38e-06, "loss": 0.0871, "step": 320 }, { "epoch": 0.44176706827309237, "grad_norm": 1.064716100692749, "learning_rate": 6.58e-06, "loss": 0.0894, "step": 330 }, { "epoch": 0.45515394912985274, "grad_norm": 0.8246449828147888, "learning_rate": 6.78e-06, "loss": 0.0909, "step": 340 }, { "epoch": 0.4685408299866131, "grad_norm": 0.45301076769828796, "learning_rate": 6.98e-06, "loss": 0.0804, "step": 350 }, { "epoch": 0.4819277108433735, "grad_norm": 0.6968113780021667, "learning_rate": 7.180000000000001e-06, "loss": 0.0865, "step": 360 }, { "epoch": 0.49531459170013387, "grad_norm": 0.6450828313827515, "learning_rate": 7.3800000000000005e-06, "loss": 0.0799, "step": 370 }, { "epoch": 0.5087014725568942, "grad_norm": 0.9476212859153748, "learning_rate": 7.580000000000001e-06, "loss": 0.0739, "step": 380 }, { "epoch": 0.5220883534136547, "grad_norm": 0.4979226887226105, "learning_rate": 7.78e-06, "loss": 0.0724, "step": 390 }, { "epoch": 0.535475234270415, "grad_norm": 0.8587151169776917, "learning_rate": 7.98e-06, "loss": 0.0789, "step": 400 }, { "epoch": 0.5488621151271754, "grad_norm": 0.7535715699195862, "learning_rate": 8.18e-06, "loss": 0.0755, "step": 410 }, { "epoch": 0.5622489959839357, "grad_norm": 1.2327359914779663, "learning_rate": 8.380000000000001e-06, "loss": 0.0677, "step": 420 }, { "epoch": 0.5756358768406962, "grad_norm": 0.8648233413696289, "learning_rate": 8.580000000000001e-06, "loss": 0.0738, "step": 430 }, { "epoch": 0.5890227576974565, "grad_norm": 1.177848219871521, "learning_rate": 8.78e-06, "loss": 0.0692, "step": 440 }, { "epoch": 0.6024096385542169, "grad_norm": 0.5768447518348694, "learning_rate": 8.98e-06, "loss": 0.0675, "step": 450 }, { "epoch": 0.6157965194109772, "grad_norm": 0.508451521396637, "learning_rate": 9.180000000000002e-06, "loss": 0.0646, "step": 460 }, { "epoch": 0.6291834002677377, "grad_norm": 0.6156247854232788, "learning_rate": 9.38e-06, "loss": 0.0612, "step": 470 }, { "epoch": 0.642570281124498, "grad_norm": 1.0961894989013672, "learning_rate": 9.58e-06, "loss": 0.0656, "step": 480 }, { "epoch": 0.6559571619812584, "grad_norm": 0.5964426398277283, "learning_rate": 9.78e-06, "loss": 0.0638, "step": 490 }, { "epoch": 0.6693440428380187, "grad_norm": 0.7437102794647217, "learning_rate": 9.980000000000001e-06, "loss": 0.0593, "step": 500 }, { "epoch": 0.6827309236947792, "grad_norm": 0.5807281732559204, "learning_rate": 1.018e-05, "loss": 0.0551, "step": 510 }, { "epoch": 0.6961178045515395, "grad_norm": 0.5860300660133362, "learning_rate": 1.038e-05, "loss": 0.0596, "step": 520 }, { "epoch": 0.7095046854082999, "grad_norm": 0.6605597138404846, "learning_rate": 1.058e-05, "loss": 0.0584, "step": 530 }, { "epoch": 0.7228915662650602, "grad_norm": 0.5232974290847778, "learning_rate": 1.0780000000000002e-05, "loss": 0.061, "step": 540 }, { "epoch": 0.7362784471218207, "grad_norm": 0.654168426990509, "learning_rate": 1.098e-05, "loss": 0.0525, "step": 550 }, { "epoch": 0.749665327978581, "grad_norm": 0.6486468315124512, "learning_rate": 1.118e-05, "loss": 0.0541, "step": 560 }, { "epoch": 0.7630522088353414, "grad_norm": 0.573939323425293, "learning_rate": 1.1380000000000001e-05, "loss": 0.0587, "step": 570 }, { "epoch": 0.7764390896921017, "grad_norm": 0.7795117497444153, "learning_rate": 1.1580000000000001e-05, "loss": 0.0552, "step": 580 }, { "epoch": 0.7898259705488622, "grad_norm": 0.7592347264289856, "learning_rate": 1.178e-05, "loss": 0.0542, "step": 590 }, { "epoch": 0.8032128514056225, "grad_norm": 1.0981130599975586, "learning_rate": 1.198e-05, "loss": 0.056, "step": 600 }, { "epoch": 0.8165997322623829, "grad_norm": 1.0834730863571167, "learning_rate": 1.2180000000000002e-05, "loss": 0.0546, "step": 610 }, { "epoch": 0.8299866131191432, "grad_norm": 1.3035888671875, "learning_rate": 1.238e-05, "loss": 0.0546, "step": 620 }, { "epoch": 0.8433734939759037, "grad_norm": 0.4903249144554138, "learning_rate": 1.258e-05, "loss": 0.0584, "step": 630 }, { "epoch": 0.856760374832664, "grad_norm": 0.7569624185562134, "learning_rate": 1.278e-05, "loss": 0.0586, "step": 640 }, { "epoch": 0.8701472556894244, "grad_norm": 1.177359938621521, "learning_rate": 1.2980000000000001e-05, "loss": 0.0552, "step": 650 }, { "epoch": 0.8835341365461847, "grad_norm": 0.5649739503860474, "learning_rate": 1.3180000000000001e-05, "loss": 0.05, "step": 660 }, { "epoch": 0.8969210174029452, "grad_norm": 0.6132524013519287, "learning_rate": 1.338e-05, "loss": 0.0496, "step": 670 }, { "epoch": 0.9103078982597055, "grad_norm": 0.729094922542572, "learning_rate": 1.358e-05, "loss": 0.0487, "step": 680 }, { "epoch": 0.9236947791164659, "grad_norm": 0.6461308598518372, "learning_rate": 1.3780000000000002e-05, "loss": 0.0496, "step": 690 }, { "epoch": 0.9370816599732262, "grad_norm": 0.733289897441864, "learning_rate": 1.3980000000000002e-05, "loss": 0.0477, "step": 700 }, { "epoch": 0.9504685408299867, "grad_norm": 0.7358296513557434, "learning_rate": 1.4180000000000001e-05, "loss": 0.0522, "step": 710 }, { "epoch": 0.963855421686747, "grad_norm": 0.5617910027503967, "learning_rate": 1.4380000000000001e-05, "loss": 0.0529, "step": 720 }, { "epoch": 0.9772423025435074, "grad_norm": 0.8012162446975708, "learning_rate": 1.4580000000000003e-05, "loss": 0.052, "step": 730 }, { "epoch": 0.9906291834002677, "grad_norm": 1.2837984561920166, "learning_rate": 1.4779999999999999e-05, "loss": 0.0518, "step": 740 }, { "epoch": 1.0040160642570282, "grad_norm": 0.8108014464378357, "learning_rate": 1.4979999999999999e-05, "loss": 0.0543, "step": 750 }, { "epoch": 1.0174029451137885, "grad_norm": 0.7881548404693604, "learning_rate": 1.518e-05, "loss": 0.0481, "step": 760 }, { "epoch": 1.0307898259705488, "grad_norm": 1.3498380184173584, "learning_rate": 1.538e-05, "loss": 0.0486, "step": 770 }, { "epoch": 1.0441767068273093, "grad_norm": 0.43161118030548096, "learning_rate": 1.558e-05, "loss": 0.0489, "step": 780 }, { "epoch": 1.0575635876840697, "grad_norm": 0.8494847416877747, "learning_rate": 1.578e-05, "loss": 0.0499, "step": 790 }, { "epoch": 1.07095046854083, "grad_norm": 0.4831250011920929, "learning_rate": 1.598e-05, "loss": 0.0451, "step": 800 }, { "epoch": 1.0843373493975903, "grad_norm": 0.45015770196914673, "learning_rate": 1.618e-05, "loss": 0.0445, "step": 810 }, { "epoch": 1.0977242302543506, "grad_norm": 0.4186054766178131, "learning_rate": 1.6380000000000002e-05, "loss": 0.0408, "step": 820 }, { "epoch": 1.1111111111111112, "grad_norm": 0.4183540642261505, "learning_rate": 1.658e-05, "loss": 0.0471, "step": 830 }, { "epoch": 1.1244979919678715, "grad_norm": 0.9975723624229431, "learning_rate": 1.6780000000000002e-05, "loss": 0.048, "step": 840 }, { "epoch": 1.1378848728246318, "grad_norm": 0.5891368985176086, "learning_rate": 1.698e-05, "loss": 0.0461, "step": 850 }, { "epoch": 1.1512717536813923, "grad_norm": 0.7827931642532349, "learning_rate": 1.718e-05, "loss": 0.0445, "step": 860 }, { "epoch": 1.1646586345381527, "grad_norm": 0.969237208366394, "learning_rate": 1.7380000000000003e-05, "loss": 0.0441, "step": 870 }, { "epoch": 1.178045515394913, "grad_norm": 0.5770193338394165, "learning_rate": 1.758e-05, "loss": 0.0452, "step": 880 }, { "epoch": 1.1914323962516733, "grad_norm": 0.6729143857955933, "learning_rate": 1.7780000000000003e-05, "loss": 0.0453, "step": 890 }, { "epoch": 1.2048192771084336, "grad_norm": 1.1404643058776855, "learning_rate": 1.798e-05, "loss": 0.043, "step": 900 }, { "epoch": 1.2182061579651942, "grad_norm": 0.9220609664916992, "learning_rate": 1.818e-05, "loss": 0.041, "step": 910 }, { "epoch": 1.2315930388219545, "grad_norm": 0.6603626608848572, "learning_rate": 1.838e-05, "loss": 0.0463, "step": 920 }, { "epoch": 1.2449799196787148, "grad_norm": 0.8510183095932007, "learning_rate": 1.858e-05, "loss": 0.0494, "step": 930 }, { "epoch": 1.2583668005354753, "grad_norm": 0.9890204668045044, "learning_rate": 1.878e-05, "loss": 0.0437, "step": 940 }, { "epoch": 1.2717536813922357, "grad_norm": 0.6433798670768738, "learning_rate": 1.898e-05, "loss": 0.0421, "step": 950 }, { "epoch": 1.285140562248996, "grad_norm": 0.4254779815673828, "learning_rate": 1.918e-05, "loss": 0.0408, "step": 960 }, { "epoch": 1.2985274431057563, "grad_norm": 0.6381332278251648, "learning_rate": 1.938e-05, "loss": 0.0403, "step": 970 }, { "epoch": 1.3119143239625166, "grad_norm": 0.9419626593589783, "learning_rate": 1.9580000000000002e-05, "loss": 0.0444, "step": 980 }, { "epoch": 1.3253012048192772, "grad_norm": 0.5006697773933411, "learning_rate": 1.978e-05, "loss": 0.0426, "step": 990 }, { "epoch": 1.3386880856760375, "grad_norm": 0.4657343924045563, "learning_rate": 1.9980000000000002e-05, "loss": 0.0445, "step": 1000 }, { "epoch": 1.3520749665327978, "grad_norm": 0.8142620921134949, "learning_rate": 2.0180000000000003e-05, "loss": 0.0404, "step": 1010 }, { "epoch": 1.3654618473895583, "grad_norm": 0.5392622351646423, "learning_rate": 2.038e-05, "loss": 0.0445, "step": 1020 }, { "epoch": 1.3788487282463187, "grad_norm": 0.4657094180583954, "learning_rate": 2.0580000000000003e-05, "loss": 0.0377, "step": 1030 }, { "epoch": 1.392235609103079, "grad_norm": 0.45610883831977844, "learning_rate": 2.078e-05, "loss": 0.0405, "step": 1040 }, { "epoch": 1.4056224899598393, "grad_norm": 0.45099711418151855, "learning_rate": 2.098e-05, "loss": 0.0367, "step": 1050 }, { "epoch": 1.4190093708165996, "grad_norm": 0.4207739531993866, "learning_rate": 2.118e-05, "loss": 0.0357, "step": 1060 }, { "epoch": 1.4323962516733602, "grad_norm": 0.4164259731769562, "learning_rate": 2.138e-05, "loss": 0.0382, "step": 1070 }, { "epoch": 1.4457831325301205, "grad_norm": 0.5484375953674316, "learning_rate": 2.158e-05, "loss": 0.038, "step": 1080 }, { "epoch": 1.4591700133868808, "grad_norm": 0.648046612739563, "learning_rate": 2.178e-05, "loss": 0.0354, "step": 1090 }, { "epoch": 1.4725568942436413, "grad_norm": 0.6838318109512329, "learning_rate": 2.198e-05, "loss": 0.0401, "step": 1100 }, { "epoch": 1.4859437751004017, "grad_norm": 0.5908910036087036, "learning_rate": 2.218e-05, "loss": 0.03, "step": 1110 }, { "epoch": 1.499330655957162, "grad_norm": 0.630096971988678, "learning_rate": 2.2380000000000003e-05, "loss": 0.0363, "step": 1120 }, { "epoch": 1.5127175368139225, "grad_norm": 1.1222299337387085, "learning_rate": 2.258e-05, "loss": 0.0355, "step": 1130 }, { "epoch": 1.5261044176706826, "grad_norm": 0.6671978235244751, "learning_rate": 2.2780000000000002e-05, "loss": 0.0389, "step": 1140 }, { "epoch": 1.5394912985274432, "grad_norm": 0.6846047639846802, "learning_rate": 2.298e-05, "loss": 0.0374, "step": 1150 }, { "epoch": 1.5528781793842035, "grad_norm": 0.6602447032928467, "learning_rate": 2.318e-05, "loss": 0.0403, "step": 1160 }, { "epoch": 1.5662650602409638, "grad_norm": 0.67584627866745, "learning_rate": 2.3380000000000003e-05, "loss": 0.0411, "step": 1170 }, { "epoch": 1.5796519410977243, "grad_norm": 0.7195892930030823, "learning_rate": 2.358e-05, "loss": 0.0368, "step": 1180 }, { "epoch": 1.5930388219544847, "grad_norm": 0.8645982146263123, "learning_rate": 2.3780000000000003e-05, "loss": 0.0365, "step": 1190 }, { "epoch": 1.606425702811245, "grad_norm": 0.7479833960533142, "learning_rate": 2.398e-05, "loss": 0.0389, "step": 1200 }, { "epoch": 1.6198125836680055, "grad_norm": 0.6962466239929199, "learning_rate": 2.418e-05, "loss": 0.0393, "step": 1210 }, { "epoch": 1.6331994645247656, "grad_norm": 0.6987592577934265, "learning_rate": 2.438e-05, "loss": 0.0348, "step": 1220 }, { "epoch": 1.6465863453815262, "grad_norm": 0.4744699001312256, "learning_rate": 2.4580000000000002e-05, "loss": 0.0326, "step": 1230 }, { "epoch": 1.6599732262382865, "grad_norm": 0.4565297067165375, "learning_rate": 2.478e-05, "loss": 0.0367, "step": 1240 }, { "epoch": 1.6733601070950468, "grad_norm": 0.5803639888763428, "learning_rate": 2.498e-05, "loss": 0.0321, "step": 1250 }, { "epoch": 1.6867469879518073, "grad_norm": 0.40769124031066895, "learning_rate": 2.5180000000000003e-05, "loss": 0.0385, "step": 1260 }, { "epoch": 1.7001338688085676, "grad_norm": 0.4467719793319702, "learning_rate": 2.5380000000000004e-05, "loss": 0.0363, "step": 1270 }, { "epoch": 1.713520749665328, "grad_norm": 0.679752767086029, "learning_rate": 2.5580000000000002e-05, "loss": 0.0409, "step": 1280 }, { "epoch": 1.7269076305220885, "grad_norm": 0.3752689063549042, "learning_rate": 2.5779999999999997e-05, "loss": 0.0354, "step": 1290 }, { "epoch": 1.7402945113788486, "grad_norm": 0.737478494644165, "learning_rate": 2.598e-05, "loss": 0.0383, "step": 1300 }, { "epoch": 1.7536813922356091, "grad_norm": 0.6817255020141602, "learning_rate": 2.618e-05, "loss": 0.0367, "step": 1310 }, { "epoch": 1.7670682730923695, "grad_norm": 0.4271469712257385, "learning_rate": 2.6379999999999998e-05, "loss": 0.0286, "step": 1320 }, { "epoch": 1.7804551539491298, "grad_norm": 0.7075365781784058, "learning_rate": 2.658e-05, "loss": 0.0409, "step": 1330 }, { "epoch": 1.7938420348058903, "grad_norm": 1.0092262029647827, "learning_rate": 2.678e-05, "loss": 0.037, "step": 1340 }, { "epoch": 1.8072289156626506, "grad_norm": 0.4126337766647339, "learning_rate": 2.698e-05, "loss": 0.0321, "step": 1350 }, { "epoch": 1.820615796519411, "grad_norm": 0.6170799732208252, "learning_rate": 2.718e-05, "loss": 0.0369, "step": 1360 }, { "epoch": 1.8340026773761715, "grad_norm": 0.3480358421802521, "learning_rate": 2.738e-05, "loss": 0.0417, "step": 1370 }, { "epoch": 1.8473895582329316, "grad_norm": 0.40782880783081055, "learning_rate": 2.758e-05, "loss": 0.0304, "step": 1380 }, { "epoch": 1.8607764390896921, "grad_norm": 0.4395017623901367, "learning_rate": 2.778e-05, "loss": 0.0305, "step": 1390 }, { "epoch": 1.8741633199464525, "grad_norm": 0.58582603931427, "learning_rate": 2.798e-05, "loss": 0.0331, "step": 1400 }, { "epoch": 1.8875502008032128, "grad_norm": 0.6668860912322998, "learning_rate": 2.818e-05, "loss": 0.0351, "step": 1410 }, { "epoch": 1.9009370816599733, "grad_norm": 0.5585152506828308, "learning_rate": 2.8380000000000003e-05, "loss": 0.0342, "step": 1420 }, { "epoch": 1.9143239625167336, "grad_norm": 1.4036839008331299, "learning_rate": 2.858e-05, "loss": 0.0327, "step": 1430 }, { "epoch": 1.927710843373494, "grad_norm": 0.5699360370635986, "learning_rate": 2.8780000000000002e-05, "loss": 0.0323, "step": 1440 }, { "epoch": 1.9410977242302545, "grad_norm": 0.4498472511768341, "learning_rate": 2.898e-05, "loss": 0.0311, "step": 1450 }, { "epoch": 1.9544846050870146, "grad_norm": 0.5533521771430969, "learning_rate": 2.9180000000000002e-05, "loss": 0.0375, "step": 1460 }, { "epoch": 1.9678714859437751, "grad_norm": 0.8270938396453857, "learning_rate": 2.9380000000000003e-05, "loss": 0.033, "step": 1470 }, { "epoch": 1.9812583668005355, "grad_norm": 0.5519605875015259, "learning_rate": 2.958e-05, "loss": 0.0357, "step": 1480 }, { "epoch": 1.9946452476572958, "grad_norm": 0.4262024164199829, "learning_rate": 2.9780000000000003e-05, "loss": 0.0365, "step": 1490 }, { "epoch": 2.0080321285140563, "grad_norm": 0.49969595670700073, "learning_rate": 2.998e-05, "loss": 0.0318, "step": 1500 }, { "epoch": 2.0214190093708164, "grad_norm": 0.49290671944618225, "learning_rate": 3.0180000000000002e-05, "loss": 0.0332, "step": 1510 }, { "epoch": 2.034805890227577, "grad_norm": 1.474175214767456, "learning_rate": 3.0380000000000004e-05, "loss": 0.0313, "step": 1520 }, { "epoch": 2.0481927710843375, "grad_norm": 1.088426113128662, "learning_rate": 3.058e-05, "loss": 0.0376, "step": 1530 }, { "epoch": 2.0615796519410976, "grad_norm": 0.6640676856040955, "learning_rate": 3.078e-05, "loss": 0.0316, "step": 1540 }, { "epoch": 2.074966532797858, "grad_norm": 0.47039592266082764, "learning_rate": 3.0980000000000005e-05, "loss": 0.0372, "step": 1550 }, { "epoch": 2.0883534136546187, "grad_norm": 0.6119139194488525, "learning_rate": 3.118e-05, "loss": 0.0428, "step": 1560 }, { "epoch": 2.101740294511379, "grad_norm": 0.6293010115623474, "learning_rate": 3.138e-05, "loss": 0.0363, "step": 1570 }, { "epoch": 2.1151271753681393, "grad_norm": 0.4402483403682709, "learning_rate": 3.1580000000000006e-05, "loss": 0.0339, "step": 1580 }, { "epoch": 2.1285140562248994, "grad_norm": 0.6767235398292542, "learning_rate": 3.1780000000000004e-05, "loss": 0.033, "step": 1590 }, { "epoch": 2.14190093708166, "grad_norm": 0.5589588284492493, "learning_rate": 3.198e-05, "loss": 0.0339, "step": 1600 }, { "epoch": 2.1552878179384205, "grad_norm": 0.36218103766441345, "learning_rate": 3.218e-05, "loss": 0.0292, "step": 1610 }, { "epoch": 2.1686746987951806, "grad_norm": 0.4453073740005493, "learning_rate": 3.238e-05, "loss": 0.0291, "step": 1620 }, { "epoch": 2.182061579651941, "grad_norm": 0.45879265666007996, "learning_rate": 3.2579999999999996e-05, "loss": 0.0333, "step": 1630 }, { "epoch": 2.1954484605087012, "grad_norm": 0.5943646430969238, "learning_rate": 3.278e-05, "loss": 0.0298, "step": 1640 }, { "epoch": 2.208835341365462, "grad_norm": 0.5365813374519348, "learning_rate": 3.298e-05, "loss": 0.0305, "step": 1650 }, { "epoch": 2.2222222222222223, "grad_norm": 0.4441159963607788, "learning_rate": 3.318e-05, "loss": 0.0306, "step": 1660 }, { "epoch": 2.2356091030789824, "grad_norm": 0.5501639246940613, "learning_rate": 3.338e-05, "loss": 0.0291, "step": 1670 }, { "epoch": 2.248995983935743, "grad_norm": 0.2848358452320099, "learning_rate": 3.358e-05, "loss": 0.0316, "step": 1680 }, { "epoch": 2.2623828647925035, "grad_norm": 0.7831214070320129, "learning_rate": 3.378e-05, "loss": 0.0371, "step": 1690 }, { "epoch": 2.2757697456492636, "grad_norm": 0.32313355803489685, "learning_rate": 3.398e-05, "loss": 0.0328, "step": 1700 }, { "epoch": 2.289156626506024, "grad_norm": 0.3116050660610199, "learning_rate": 3.418e-05, "loss": 0.0347, "step": 1710 }, { "epoch": 2.3025435073627847, "grad_norm": 0.28335124254226685, "learning_rate": 3.438e-05, "loss": 0.0282, "step": 1720 }, { "epoch": 2.3159303882195448, "grad_norm": 0.3657524287700653, "learning_rate": 3.4580000000000004e-05, "loss": 0.0306, "step": 1730 }, { "epoch": 2.3293172690763053, "grad_norm": 0.3579058349132538, "learning_rate": 3.478e-05, "loss": 0.0269, "step": 1740 }, { "epoch": 2.3427041499330654, "grad_norm": 0.6250649690628052, "learning_rate": 3.498e-05, "loss": 0.0312, "step": 1750 }, { "epoch": 2.356091030789826, "grad_norm": 0.3707387447357178, "learning_rate": 3.518e-05, "loss": 0.0325, "step": 1760 }, { "epoch": 2.3694779116465865, "grad_norm": 0.30829742550849915, "learning_rate": 3.5380000000000003e-05, "loss": 0.0289, "step": 1770 }, { "epoch": 2.3828647925033466, "grad_norm": 0.799526572227478, "learning_rate": 3.558e-05, "loss": 0.0306, "step": 1780 }, { "epoch": 2.396251673360107, "grad_norm": 0.3945527672767639, "learning_rate": 3.578e-05, "loss": 0.0321, "step": 1790 }, { "epoch": 2.4096385542168672, "grad_norm": 0.6701476573944092, "learning_rate": 3.5980000000000004e-05, "loss": 0.0328, "step": 1800 }, { "epoch": 2.4230254350736278, "grad_norm": 0.7032196521759033, "learning_rate": 3.618e-05, "loss": 0.0337, "step": 1810 }, { "epoch": 2.4364123159303883, "grad_norm": 0.5313075184822083, "learning_rate": 3.638e-05, "loss": 0.0342, "step": 1820 }, { "epoch": 2.4497991967871484, "grad_norm": 0.5928793549537659, "learning_rate": 3.6580000000000006e-05, "loss": 0.0328, "step": 1830 }, { "epoch": 2.463186077643909, "grad_norm": 0.5612986087799072, "learning_rate": 3.6780000000000004e-05, "loss": 0.0317, "step": 1840 }, { "epoch": 2.4765729585006695, "grad_norm": 0.36505380272865295, "learning_rate": 3.698e-05, "loss": 0.0318, "step": 1850 }, { "epoch": 2.4899598393574296, "grad_norm": 1.2023345232009888, "learning_rate": 3.7180000000000007e-05, "loss": 0.0327, "step": 1860 }, { "epoch": 2.50334672021419, "grad_norm": 1.2761398553848267, "learning_rate": 3.7380000000000005e-05, "loss": 0.0283, "step": 1870 }, { "epoch": 2.5167336010709507, "grad_norm": 1.1124991178512573, "learning_rate": 3.758e-05, "loss": 0.0325, "step": 1880 }, { "epoch": 2.5301204819277108, "grad_norm": 0.40218469500541687, "learning_rate": 3.778000000000001e-05, "loss": 0.0335, "step": 1890 }, { "epoch": 2.5435073627844713, "grad_norm": 0.5131334066390991, "learning_rate": 3.7980000000000006e-05, "loss": 0.0277, "step": 1900 }, { "epoch": 2.5568942436412314, "grad_norm": 0.8401867747306824, "learning_rate": 3.818e-05, "loss": 0.0335, "step": 1910 }, { "epoch": 2.570281124497992, "grad_norm": 0.38820406794548035, "learning_rate": 3.838e-05, "loss": 0.0276, "step": 1920 }, { "epoch": 2.5836680053547525, "grad_norm": 0.5476343035697937, "learning_rate": 3.858e-05, "loss": 0.0305, "step": 1930 }, { "epoch": 2.5970548862115126, "grad_norm": 0.49557268619537354, "learning_rate": 3.878e-05, "loss": 0.0377, "step": 1940 }, { "epoch": 2.610441767068273, "grad_norm": 0.39305806159973145, "learning_rate": 3.898e-05, "loss": 0.0325, "step": 1950 }, { "epoch": 2.6238286479250332, "grad_norm": 0.39029932022094727, "learning_rate": 3.918e-05, "loss": 0.036, "step": 1960 }, { "epoch": 2.6372155287817938, "grad_norm": 0.4531610608100891, "learning_rate": 3.938e-05, "loss": 0.0317, "step": 1970 }, { "epoch": 2.6506024096385543, "grad_norm": 0.43554210662841797, "learning_rate": 3.958e-05, "loss": 0.034, "step": 1980 }, { "epoch": 2.663989290495315, "grad_norm": 0.3571923077106476, "learning_rate": 3.978e-05, "loss": 0.0292, "step": 1990 }, { "epoch": 2.677376171352075, "grad_norm": 0.5475931167602539, "learning_rate": 3.998e-05, "loss": 0.0313, "step": 2000 }, { "epoch": 2.6907630522088355, "grad_norm": 0.48654893040657043, "learning_rate": 4.018e-05, "loss": 0.0288, "step": 2010 }, { "epoch": 2.7041499330655956, "grad_norm": 0.6027318239212036, "learning_rate": 4.038e-05, "loss": 0.0334, "step": 2020 }, { "epoch": 2.717536813922356, "grad_norm": 0.4515067934989929, "learning_rate": 4.058e-05, "loss": 0.0293, "step": 2030 }, { "epoch": 2.7309236947791167, "grad_norm": 0.6377482414245605, "learning_rate": 4.078e-05, "loss": 0.032, "step": 2040 }, { "epoch": 2.7443105756358768, "grad_norm": 0.4819904863834381, "learning_rate": 4.0980000000000004e-05, "loss": 0.0318, "step": 2050 }, { "epoch": 2.7576974564926373, "grad_norm": 0.7053554654121399, "learning_rate": 4.118e-05, "loss": 0.0287, "step": 2060 }, { "epoch": 2.7710843373493974, "grad_norm": 0.4567822813987732, "learning_rate": 4.138e-05, "loss": 0.032, "step": 2070 }, { "epoch": 2.784471218206158, "grad_norm": 0.33414727449417114, "learning_rate": 4.1580000000000005e-05, "loss": 0.0293, "step": 2080 }, { "epoch": 2.7978580990629185, "grad_norm": 0.5735210180282593, "learning_rate": 4.178e-05, "loss": 0.0321, "step": 2090 }, { "epoch": 2.8112449799196786, "grad_norm": 0.4800925850868225, "learning_rate": 4.198e-05, "loss": 0.0307, "step": 2100 }, { "epoch": 2.824631860776439, "grad_norm": 0.538004457950592, "learning_rate": 4.2180000000000006e-05, "loss": 0.0268, "step": 2110 }, { "epoch": 2.8380187416331992, "grad_norm": 0.3366444706916809, "learning_rate": 4.2380000000000004e-05, "loss": 0.0268, "step": 2120 }, { "epoch": 2.8514056224899598, "grad_norm": 0.3467133343219757, "learning_rate": 4.258e-05, "loss": 0.0321, "step": 2130 }, { "epoch": 2.8647925033467203, "grad_norm": 0.5078033208847046, "learning_rate": 4.278e-05, "loss": 0.0276, "step": 2140 }, { "epoch": 2.878179384203481, "grad_norm": 0.5245142579078674, "learning_rate": 4.2980000000000005e-05, "loss": 0.0292, "step": 2150 }, { "epoch": 2.891566265060241, "grad_norm": 0.5201922655105591, "learning_rate": 4.318e-05, "loss": 0.0264, "step": 2160 }, { "epoch": 2.9049531459170015, "grad_norm": 0.7199699282646179, "learning_rate": 4.338e-05, "loss": 0.0235, "step": 2170 }, { "epoch": 2.9183400267737616, "grad_norm": 0.33862927556037903, "learning_rate": 4.3580000000000006e-05, "loss": 0.0266, "step": 2180 }, { "epoch": 2.931726907630522, "grad_norm": 0.6050364971160889, "learning_rate": 4.3780000000000004e-05, "loss": 0.0275, "step": 2190 }, { "epoch": 2.9451137884872827, "grad_norm": 0.4943307042121887, "learning_rate": 4.398e-05, "loss": 0.0325, "step": 2200 }, { "epoch": 2.9585006693440428, "grad_norm": 0.5588234663009644, "learning_rate": 4.418000000000001e-05, "loss": 0.029, "step": 2210 }, { "epoch": 2.9718875502008033, "grad_norm": 0.38576632738113403, "learning_rate": 4.438e-05, "loss": 0.0312, "step": 2220 }, { "epoch": 2.9852744310575634, "grad_norm": 0.33135977387428284, "learning_rate": 4.458e-05, "loss": 0.0277, "step": 2230 }, { "epoch": 2.998661311914324, "grad_norm": 0.3903142809867859, "learning_rate": 4.478e-05, "loss": 0.0273, "step": 2240 }, { "epoch": 3.0120481927710845, "grad_norm": 0.40142399072647095, "learning_rate": 4.498e-05, "loss": 0.0319, "step": 2250 }, { "epoch": 3.0254350736278446, "grad_norm": 0.5654523372650146, "learning_rate": 4.518e-05, "loss": 0.0298, "step": 2260 }, { "epoch": 3.038821954484605, "grad_norm": 0.4361765682697296, "learning_rate": 4.538e-05, "loss": 0.0267, "step": 2270 }, { "epoch": 3.0522088353413657, "grad_norm": 0.7305195927619934, "learning_rate": 4.558e-05, "loss": 0.0291, "step": 2280 }, { "epoch": 3.0655957161981258, "grad_norm": 0.5301432013511658, "learning_rate": 4.578e-05, "loss": 0.0289, "step": 2290 }, { "epoch": 3.0789825970548863, "grad_norm": 0.5859612822532654, "learning_rate": 4.5980000000000004e-05, "loss": 0.0312, "step": 2300 }, { "epoch": 3.0923694779116464, "grad_norm": 0.35726794600486755, "learning_rate": 4.618e-05, "loss": 0.0306, "step": 2310 }, { "epoch": 3.105756358768407, "grad_norm": 0.9081512093544006, "learning_rate": 4.638e-05, "loss": 0.0305, "step": 2320 }, { "epoch": 3.1191432396251675, "grad_norm": 0.7240023016929626, "learning_rate": 4.6580000000000005e-05, "loss": 0.0343, "step": 2330 }, { "epoch": 3.1325301204819276, "grad_norm": 0.5154154896736145, "learning_rate": 4.678e-05, "loss": 0.0317, "step": 2340 }, { "epoch": 3.145917001338688, "grad_norm": 0.2752417325973511, "learning_rate": 4.698e-05, "loss": 0.0324, "step": 2350 }, { "epoch": 3.1593038821954487, "grad_norm": 0.3282206952571869, "learning_rate": 4.718e-05, "loss": 0.0306, "step": 2360 }, { "epoch": 3.1726907630522088, "grad_norm": 0.5892446637153625, "learning_rate": 4.7380000000000004e-05, "loss": 0.0298, "step": 2370 }, { "epoch": 3.1860776439089693, "grad_norm": 0.35505232214927673, "learning_rate": 4.758e-05, "loss": 0.0291, "step": 2380 }, { "epoch": 3.1994645247657294, "grad_norm": 0.3648695647716522, "learning_rate": 4.778e-05, "loss": 0.0285, "step": 2390 }, { "epoch": 3.21285140562249, "grad_norm": 0.36381983757019043, "learning_rate": 4.7980000000000005e-05, "loss": 0.0265, "step": 2400 }, { "epoch": 3.2262382864792505, "grad_norm": 0.7317960858345032, "learning_rate": 4.818e-05, "loss": 0.0291, "step": 2410 }, { "epoch": 3.2396251673360106, "grad_norm": 1.3959763050079346, "learning_rate": 4.838e-05, "loss": 0.0374, "step": 2420 }, { "epoch": 3.253012048192771, "grad_norm": 0.4136962294578552, "learning_rate": 4.8580000000000006e-05, "loss": 0.0285, "step": 2430 }, { "epoch": 3.266398929049531, "grad_norm": 0.8105162978172302, "learning_rate": 4.8780000000000004e-05, "loss": 0.0332, "step": 2440 }, { "epoch": 3.2797858099062918, "grad_norm": 0.30481186509132385, "learning_rate": 4.898e-05, "loss": 0.0309, "step": 2450 }, { "epoch": 3.2931726907630523, "grad_norm": 0.5565037131309509, "learning_rate": 4.918000000000001e-05, "loss": 0.0296, "step": 2460 }, { "epoch": 3.3065595716198124, "grad_norm": 0.3817591965198517, "learning_rate": 4.9380000000000005e-05, "loss": 0.0321, "step": 2470 }, { "epoch": 3.319946452476573, "grad_norm": 0.8669394850730896, "learning_rate": 4.958e-05, "loss": 0.0369, "step": 2480 }, { "epoch": 3.3333333333333335, "grad_norm": 0.33075788617134094, "learning_rate": 4.978e-05, "loss": 0.0371, "step": 2490 }, { "epoch": 3.3467202141900936, "grad_norm": 0.3888695538043976, "learning_rate": 4.9980000000000006e-05, "loss": 0.0277, "step": 2500 }, { "epoch": 3.360107095046854, "grad_norm": 0.5870815515518188, "learning_rate": 5.0180000000000004e-05, "loss": 0.0296, "step": 2510 }, { "epoch": 3.3734939759036147, "grad_norm": 0.6338012218475342, "learning_rate": 5.038e-05, "loss": 0.0295, "step": 2520 }, { "epoch": 3.3868808567603748, "grad_norm": 0.7033401131629944, "learning_rate": 5.058000000000001e-05, "loss": 0.0319, "step": 2530 }, { "epoch": 3.4002677376171353, "grad_norm": 0.5548107624053955, "learning_rate": 5.0780000000000005e-05, "loss": 0.0316, "step": 2540 }, { "epoch": 3.4136546184738954, "grad_norm": 0.2527974247932434, "learning_rate": 5.098e-05, "loss": 0.0316, "step": 2550 }, { "epoch": 3.427041499330656, "grad_norm": 0.4245985448360443, "learning_rate": 5.118000000000001e-05, "loss": 0.0297, "step": 2560 }, { "epoch": 3.4404283801874165, "grad_norm": 0.6474616527557373, "learning_rate": 5.1380000000000006e-05, "loss": 0.0289, "step": 2570 }, { "epoch": 3.4538152610441766, "grad_norm": 0.3241284489631653, "learning_rate": 5.1580000000000004e-05, "loss": 0.0266, "step": 2580 }, { "epoch": 3.467202141900937, "grad_norm": 0.3123779296875, "learning_rate": 5.178000000000001e-05, "loss": 0.025, "step": 2590 }, { "epoch": 3.480589022757697, "grad_norm": 0.3686521351337433, "learning_rate": 5.198000000000001e-05, "loss": 0.033, "step": 2600 }, { "epoch": 3.4939759036144578, "grad_norm": 0.31303754448890686, "learning_rate": 5.2180000000000005e-05, "loss": 0.0294, "step": 2610 }, { "epoch": 3.5073627844712183, "grad_norm": 0.2545417845249176, "learning_rate": 5.238000000000001e-05, "loss": 0.0277, "step": 2620 }, { "epoch": 3.520749665327979, "grad_norm": 0.3321932852268219, "learning_rate": 5.258000000000001e-05, "loss": 0.0285, "step": 2630 }, { "epoch": 3.534136546184739, "grad_norm": 0.45128682255744934, "learning_rate": 5.2780000000000006e-05, "loss": 0.0328, "step": 2640 }, { "epoch": 3.5475234270414995, "grad_norm": 0.2729658782482147, "learning_rate": 5.2980000000000004e-05, "loss": 0.031, "step": 2650 }, { "epoch": 3.5609103078982596, "grad_norm": 0.35712629556655884, "learning_rate": 5.318000000000001e-05, "loss": 0.028, "step": 2660 }, { "epoch": 3.57429718875502, "grad_norm": 0.6495566964149475, "learning_rate": 5.338000000000001e-05, "loss": 0.0314, "step": 2670 }, { "epoch": 3.5876840696117807, "grad_norm": 0.49356216192245483, "learning_rate": 5.3580000000000005e-05, "loss": 0.0302, "step": 2680 }, { "epoch": 3.6010709504685408, "grad_norm": 0.47037452459335327, "learning_rate": 5.378e-05, "loss": 0.0283, "step": 2690 }, { "epoch": 3.6144578313253013, "grad_norm": 0.6284288763999939, "learning_rate": 5.3979999999999995e-05, "loss": 0.0271, "step": 2700 }, { "epoch": 3.6278447121820614, "grad_norm": 0.45488160848617554, "learning_rate": 5.418e-05, "loss": 0.0269, "step": 2710 }, { "epoch": 3.641231593038822, "grad_norm": 0.25134581327438354, "learning_rate": 5.438e-05, "loss": 0.0255, "step": 2720 }, { "epoch": 3.6546184738955825, "grad_norm": 0.3824189007282257, "learning_rate": 5.4579999999999996e-05, "loss": 0.0307, "step": 2730 }, { "epoch": 3.6680053547523426, "grad_norm": 0.5142388343811035, "learning_rate": 5.478e-05, "loss": 0.026, "step": 2740 }, { "epoch": 3.681392235609103, "grad_norm": 0.42099976539611816, "learning_rate": 5.498e-05, "loss": 0.0282, "step": 2750 }, { "epoch": 3.694779116465863, "grad_norm": 0.6228603720664978, "learning_rate": 5.518e-05, "loss": 0.0286, "step": 2760 }, { "epoch": 3.7081659973226238, "grad_norm": 0.6350533366203308, "learning_rate": 5.538e-05, "loss": 0.0298, "step": 2770 }, { "epoch": 3.7215528781793843, "grad_norm": 2.494633197784424, "learning_rate": 5.558e-05, "loss": 0.0281, "step": 2780 }, { "epoch": 3.734939759036145, "grad_norm": 0.3075539767742157, "learning_rate": 5.578e-05, "loss": 0.0253, "step": 2790 }, { "epoch": 3.748326639892905, "grad_norm": 0.32669520378112793, "learning_rate": 5.5979999999999996e-05, "loss": 0.0303, "step": 2800 }, { "epoch": 3.7617135207496655, "grad_norm": 0.4572426974773407, "learning_rate": 5.618e-05, "loss": 0.0288, "step": 2810 }, { "epoch": 3.7751004016064256, "grad_norm": 0.29279372096061707, "learning_rate": 5.638e-05, "loss": 0.0259, "step": 2820 }, { "epoch": 3.788487282463186, "grad_norm": 0.4309578835964203, "learning_rate": 5.658e-05, "loss": 0.025, "step": 2830 }, { "epoch": 3.8018741633199467, "grad_norm": 0.5688090920448303, "learning_rate": 5.678e-05, "loss": 0.029, "step": 2840 }, { "epoch": 3.8152610441767068, "grad_norm": 0.6632748246192932, "learning_rate": 5.698e-05, "loss": 0.0283, "step": 2850 }, { "epoch": 3.8286479250334673, "grad_norm": 0.3793449401855469, "learning_rate": 5.718e-05, "loss": 0.0281, "step": 2860 }, { "epoch": 3.8420348058902274, "grad_norm": 0.4066178500652313, "learning_rate": 5.738e-05, "loss": 0.0291, "step": 2870 }, { "epoch": 3.855421686746988, "grad_norm": 0.47767719626426697, "learning_rate": 5.758e-05, "loss": 0.0279, "step": 2880 }, { "epoch": 3.8688085676037485, "grad_norm": 0.56046462059021, "learning_rate": 5.778e-05, "loss": 0.0293, "step": 2890 }, { "epoch": 3.8821954484605086, "grad_norm": 0.5077753067016602, "learning_rate": 5.7980000000000004e-05, "loss": 0.0296, "step": 2900 }, { "epoch": 3.895582329317269, "grad_norm": 0.32225728034973145, "learning_rate": 5.818e-05, "loss": 0.0252, "step": 2910 }, { "epoch": 3.908969210174029, "grad_norm": 0.4174984097480774, "learning_rate": 5.838e-05, "loss": 0.0297, "step": 2920 }, { "epoch": 3.9223560910307897, "grad_norm": 0.41778525710105896, "learning_rate": 5.858e-05, "loss": 0.0293, "step": 2930 }, { "epoch": 3.9357429718875503, "grad_norm": 0.260854035615921, "learning_rate": 5.878e-05, "loss": 0.0318, "step": 2940 }, { "epoch": 3.949129852744311, "grad_norm": 0.44900771975517273, "learning_rate": 5.898e-05, "loss": 0.0284, "step": 2950 }, { "epoch": 3.962516733601071, "grad_norm": 0.41540950536727905, "learning_rate": 5.918e-05, "loss": 0.0258, "step": 2960 }, { "epoch": 3.9759036144578315, "grad_norm": 0.2509534955024719, "learning_rate": 5.9380000000000004e-05, "loss": 0.0269, "step": 2970 }, { "epoch": 3.9892904953145916, "grad_norm": 0.28206729888916016, "learning_rate": 5.958e-05, "loss": 0.0235, "step": 2980 }, { "epoch": 4.002677376171352, "grad_norm": 1.5676348209381104, "learning_rate": 5.978e-05, "loss": 0.0246, "step": 2990 }, { "epoch": 4.016064257028113, "grad_norm": 0.5232904553413391, "learning_rate": 5.9980000000000005e-05, "loss": 0.028, "step": 3000 }, { "epoch": 4.029451137884873, "grad_norm": 0.33352938294410706, "learning_rate": 6.018e-05, "loss": 0.0264, "step": 3010 }, { "epoch": 4.042838018741633, "grad_norm": 0.38555508852005005, "learning_rate": 6.038e-05, "loss": 0.0255, "step": 3020 }, { "epoch": 4.056224899598393, "grad_norm": 0.5562551021575928, "learning_rate": 6.0580000000000006e-05, "loss": 0.0241, "step": 3030 }, { "epoch": 4.069611780455154, "grad_norm": 0.5333731174468994, "learning_rate": 6.0780000000000004e-05, "loss": 0.0281, "step": 3040 }, { "epoch": 4.0829986613119145, "grad_norm": 0.4716704785823822, "learning_rate": 6.098e-05, "loss": 0.0256, "step": 3050 }, { "epoch": 4.096385542168675, "grad_norm": 0.4097311198711395, "learning_rate": 6.118000000000001e-05, "loss": 0.0246, "step": 3060 }, { "epoch": 4.109772423025435, "grad_norm": 0.47474926710128784, "learning_rate": 6.138e-05, "loss": 0.0281, "step": 3070 }, { "epoch": 4.123159303882195, "grad_norm": 0.29303738474845886, "learning_rate": 6.158e-05, "loss": 0.0247, "step": 3080 }, { "epoch": 4.136546184738956, "grad_norm": 0.5339012145996094, "learning_rate": 6.178000000000001e-05, "loss": 0.0269, "step": 3090 }, { "epoch": 4.149933065595716, "grad_norm": 0.567791759967804, "learning_rate": 6.198e-05, "loss": 0.0268, "step": 3100 }, { "epoch": 4.163319946452477, "grad_norm": 0.3173453211784363, "learning_rate": 6.218e-05, "loss": 0.0278, "step": 3110 }, { "epoch": 4.176706827309237, "grad_norm": 0.2991514503955841, "learning_rate": 6.238000000000001e-05, "loss": 0.0254, "step": 3120 }, { "epoch": 4.190093708165997, "grad_norm": 0.21947303414344788, "learning_rate": 6.258e-05, "loss": 0.0267, "step": 3130 }, { "epoch": 4.203480589022758, "grad_norm": 0.29646021127700806, "learning_rate": 6.278e-05, "loss": 0.0274, "step": 3140 }, { "epoch": 4.216867469879518, "grad_norm": 0.4813656210899353, "learning_rate": 6.298000000000001e-05, "loss": 0.0266, "step": 3150 }, { "epoch": 4.230254350736279, "grad_norm": 0.23003296554088593, "learning_rate": 6.318e-05, "loss": 0.0255, "step": 3160 }, { "epoch": 4.243641231593039, "grad_norm": 0.45569080114364624, "learning_rate": 6.338e-05, "loss": 0.0286, "step": 3170 }, { "epoch": 4.257028112449799, "grad_norm": 0.9497738480567932, "learning_rate": 6.358000000000001e-05, "loss": 0.025, "step": 3180 }, { "epoch": 4.270414993306559, "grad_norm": 0.4662807881832123, "learning_rate": 6.378e-05, "loss": 0.0254, "step": 3190 }, { "epoch": 4.28380187416332, "grad_norm": 0.3247736692428589, "learning_rate": 6.398000000000001e-05, "loss": 0.0322, "step": 3200 }, { "epoch": 4.2971887550200805, "grad_norm": 0.3928516209125519, "learning_rate": 6.418000000000001e-05, "loss": 0.0263, "step": 3210 }, { "epoch": 4.310575635876841, "grad_norm": 0.5050843358039856, "learning_rate": 6.438e-05, "loss": 0.0238, "step": 3220 }, { "epoch": 4.323962516733601, "grad_norm": 0.3523464500904083, "learning_rate": 6.458000000000001e-05, "loss": 0.0225, "step": 3230 }, { "epoch": 4.337349397590361, "grad_norm": 3.9522781372070312, "learning_rate": 6.478000000000001e-05, "loss": 0.0243, "step": 3240 }, { "epoch": 4.350736278447122, "grad_norm": 0.31297212839126587, "learning_rate": 6.498e-05, "loss": 0.026, "step": 3250 }, { "epoch": 4.364123159303882, "grad_norm": 0.40168115496635437, "learning_rate": 6.518000000000001e-05, "loss": 0.0262, "step": 3260 }, { "epoch": 4.377510040160643, "grad_norm": 0.34187689423561096, "learning_rate": 6.538000000000001e-05, "loss": 0.0275, "step": 3270 }, { "epoch": 4.3908969210174025, "grad_norm": 0.45271188020706177, "learning_rate": 6.558e-05, "loss": 0.027, "step": 3280 }, { "epoch": 4.404283801874163, "grad_norm": 0.4623757600784302, "learning_rate": 6.578000000000001e-05, "loss": 0.0285, "step": 3290 }, { "epoch": 4.417670682730924, "grad_norm": 0.32504862546920776, "learning_rate": 6.598e-05, "loss": 0.0249, "step": 3300 }, { "epoch": 4.431057563587684, "grad_norm": 0.47496286034584045, "learning_rate": 6.618e-05, "loss": 0.0239, "step": 3310 }, { "epoch": 4.444444444444445, "grad_norm": 0.3830445110797882, "learning_rate": 6.638e-05, "loss": 0.026, "step": 3320 }, { "epoch": 4.457831325301205, "grad_norm": 0.39752885699272156, "learning_rate": 6.658e-05, "loss": 0.0278, "step": 3330 }, { "epoch": 4.471218206157965, "grad_norm": 0.7040726542472839, "learning_rate": 6.678e-05, "loss": 0.0256, "step": 3340 }, { "epoch": 4.484605087014725, "grad_norm": 0.42450830340385437, "learning_rate": 6.698e-05, "loss": 0.0229, "step": 3350 }, { "epoch": 4.497991967871486, "grad_norm": 0.4042768180370331, "learning_rate": 6.718e-05, "loss": 0.0261, "step": 3360 }, { "epoch": 4.5113788487282465, "grad_norm": 0.42032092809677124, "learning_rate": 6.738e-05, "loss": 0.0273, "step": 3370 }, { "epoch": 4.524765729585007, "grad_norm": 0.32114091515541077, "learning_rate": 6.758e-05, "loss": 0.0241, "step": 3380 }, { "epoch": 4.538152610441767, "grad_norm": 0.213352769613266, "learning_rate": 6.778e-05, "loss": 0.0224, "step": 3390 }, { "epoch": 4.551539491298527, "grad_norm": 0.27077633142471313, "learning_rate": 6.798e-05, "loss": 0.0234, "step": 3400 }, { "epoch": 4.564926372155288, "grad_norm": 0.17766185104846954, "learning_rate": 6.818e-05, "loss": 0.0255, "step": 3410 }, { "epoch": 4.578313253012048, "grad_norm": 0.6031275987625122, "learning_rate": 6.838e-05, "loss": 0.0233, "step": 3420 }, { "epoch": 4.591700133868809, "grad_norm": 0.2986176609992981, "learning_rate": 6.858e-05, "loss": 0.0248, "step": 3430 }, { "epoch": 4.605087014725569, "grad_norm": 0.483426570892334, "learning_rate": 6.878e-05, "loss": 0.0277, "step": 3440 }, { "epoch": 4.618473895582329, "grad_norm": 0.34878110885620117, "learning_rate": 6.898e-05, "loss": 0.0308, "step": 3450 }, { "epoch": 4.6318607764390896, "grad_norm": 0.2797463536262512, "learning_rate": 6.918e-05, "loss": 0.0277, "step": 3460 }, { "epoch": 4.64524765729585, "grad_norm": 0.34507322311401367, "learning_rate": 6.938e-05, "loss": 0.027, "step": 3470 }, { "epoch": 4.658634538152611, "grad_norm": 0.6012392640113831, "learning_rate": 6.958e-05, "loss": 0.0271, "step": 3480 }, { "epoch": 4.672021419009371, "grad_norm": 0.22588500380516052, "learning_rate": 6.978e-05, "loss": 0.0268, "step": 3490 }, { "epoch": 4.685408299866131, "grad_norm": 0.361601859331131, "learning_rate": 6.998e-05, "loss": 0.0261, "step": 3500 }, { "epoch": 4.698795180722891, "grad_norm": 0.3032180964946747, "learning_rate": 7.018e-05, "loss": 0.0244, "step": 3510 }, { "epoch": 4.712182061579652, "grad_norm": 0.41927042603492737, "learning_rate": 7.038e-05, "loss": 0.0235, "step": 3520 }, { "epoch": 4.7255689424364125, "grad_norm": 0.35093963146209717, "learning_rate": 7.058e-05, "loss": 0.0279, "step": 3530 }, { "epoch": 4.738955823293173, "grad_norm": 0.8468965291976929, "learning_rate": 7.078e-05, "loss": 0.0257, "step": 3540 }, { "epoch": 4.7523427041499335, "grad_norm": 0.3216632902622223, "learning_rate": 7.098e-05, "loss": 0.0238, "step": 3550 }, { "epoch": 4.765729585006693, "grad_norm": 0.3029676675796509, "learning_rate": 7.118e-05, "loss": 0.0268, "step": 3560 }, { "epoch": 4.779116465863454, "grad_norm": 0.5512823462486267, "learning_rate": 7.138e-05, "loss": 0.0256, "step": 3570 }, { "epoch": 4.792503346720214, "grad_norm": 0.3555910587310791, "learning_rate": 7.158e-05, "loss": 0.0268, "step": 3580 }, { "epoch": 4.805890227576975, "grad_norm": 0.34082505106925964, "learning_rate": 7.178000000000001e-05, "loss": 0.0267, "step": 3590 }, { "epoch": 4.8192771084337345, "grad_norm": 0.361375093460083, "learning_rate": 7.198e-05, "loss": 0.0267, "step": 3600 }, { "epoch": 4.832663989290495, "grad_norm": 0.2521323561668396, "learning_rate": 7.218e-05, "loss": 0.0234, "step": 3610 }, { "epoch": 4.8460508701472556, "grad_norm": 0.35989004373550415, "learning_rate": 7.238000000000001e-05, "loss": 0.0235, "step": 3620 }, { "epoch": 4.859437751004016, "grad_norm": 0.2901705503463745, "learning_rate": 7.258e-05, "loss": 0.0238, "step": 3630 }, { "epoch": 4.872824631860777, "grad_norm": 0.4676411747932434, "learning_rate": 7.278e-05, "loss": 0.025, "step": 3640 }, { "epoch": 4.886211512717537, "grad_norm": 0.27701494097709656, "learning_rate": 7.298000000000001e-05, "loss": 0.0229, "step": 3650 }, { "epoch": 4.899598393574297, "grad_norm": 0.53192138671875, "learning_rate": 7.318e-05, "loss": 0.0249, "step": 3660 }, { "epoch": 4.912985274431057, "grad_norm": 0.398557186126709, "learning_rate": 7.338e-05, "loss": 0.0277, "step": 3670 }, { "epoch": 4.926372155287818, "grad_norm": 0.25634875893592834, "learning_rate": 7.358000000000001e-05, "loss": 0.0227, "step": 3680 }, { "epoch": 4.9397590361445785, "grad_norm": 0.271856427192688, "learning_rate": 7.378e-05, "loss": 0.0242, "step": 3690 }, { "epoch": 4.953145917001339, "grad_norm": 0.43850207328796387, "learning_rate": 7.398e-05, "loss": 0.0263, "step": 3700 }, { "epoch": 4.966532797858099, "grad_norm": 0.4283751845359802, "learning_rate": 7.418000000000001e-05, "loss": 0.0242, "step": 3710 }, { "epoch": 4.979919678714859, "grad_norm": 0.42630720138549805, "learning_rate": 7.438e-05, "loss": 0.0279, "step": 3720 }, { "epoch": 4.99330655957162, "grad_norm": 0.3594653010368347, "learning_rate": 7.458000000000001e-05, "loss": 0.0242, "step": 3730 }, { "epoch": 5.00669344042838, "grad_norm": 0.3213673532009125, "learning_rate": 7.478e-05, "loss": 0.0246, "step": 3740 }, { "epoch": 5.020080321285141, "grad_norm": 0.5568256378173828, "learning_rate": 7.498e-05, "loss": 0.0288, "step": 3750 }, { "epoch": 5.033467202141901, "grad_norm": 0.4741436541080475, "learning_rate": 7.518000000000001e-05, "loss": 0.0231, "step": 3760 }, { "epoch": 5.046854082998661, "grad_norm": 0.8884077668190002, "learning_rate": 7.538e-05, "loss": 0.0235, "step": 3770 }, { "epoch": 5.0602409638554215, "grad_norm": 0.38060975074768066, "learning_rate": 7.558e-05, "loss": 0.0239, "step": 3780 }, { "epoch": 5.073627844712182, "grad_norm": 0.9360460042953491, "learning_rate": 7.578000000000001e-05, "loss": 0.0254, "step": 3790 }, { "epoch": 5.087014725568943, "grad_norm": 0.46830788254737854, "learning_rate": 7.598e-05, "loss": 0.0284, "step": 3800 }, { "epoch": 5.100401606425703, "grad_norm": 0.33688411116600037, "learning_rate": 7.618e-05, "loss": 0.0273, "step": 3810 }, { "epoch": 5.113788487282463, "grad_norm": 0.30218562483787537, "learning_rate": 7.638000000000001e-05, "loss": 0.0269, "step": 3820 }, { "epoch": 5.127175368139223, "grad_norm": 0.33785828948020935, "learning_rate": 7.658e-05, "loss": 0.0277, "step": 3830 }, { "epoch": 5.140562248995984, "grad_norm": 0.4405739903450012, "learning_rate": 7.678000000000001e-05, "loss": 0.0257, "step": 3840 }, { "epoch": 5.1539491298527444, "grad_norm": 0.3405136168003082, "learning_rate": 7.698000000000001e-05, "loss": 0.0253, "step": 3850 }, { "epoch": 5.167336010709505, "grad_norm": 0.2938759922981262, "learning_rate": 7.718e-05, "loss": 0.025, "step": 3860 }, { "epoch": 5.180722891566265, "grad_norm": 0.2377338707447052, "learning_rate": 7.738000000000001e-05, "loss": 0.0259, "step": 3870 }, { "epoch": 5.194109772423025, "grad_norm": 0.3697676658630371, "learning_rate": 7.758000000000001e-05, "loss": 0.0276, "step": 3880 }, { "epoch": 5.207496653279786, "grad_norm": 0.40296420454978943, "learning_rate": 7.778e-05, "loss": 0.0291, "step": 3890 }, { "epoch": 5.220883534136546, "grad_norm": 0.35574638843536377, "learning_rate": 7.798000000000001e-05, "loss": 0.0288, "step": 3900 }, { "epoch": 5.234270414993307, "grad_norm": 1.513964295387268, "learning_rate": 7.818000000000001e-05, "loss": 0.0273, "step": 3910 }, { "epoch": 5.247657295850067, "grad_norm": 0.33365699648857117, "learning_rate": 7.838e-05, "loss": 0.029, "step": 3920 }, { "epoch": 5.261044176706827, "grad_norm": 0.43158286809921265, "learning_rate": 7.858000000000001e-05, "loss": 0.0275, "step": 3930 }, { "epoch": 5.2744310575635875, "grad_norm": 0.34941309690475464, "learning_rate": 7.878e-05, "loss": 0.0262, "step": 3940 }, { "epoch": 5.287817938420348, "grad_norm": 0.348500519990921, "learning_rate": 7.897999999999999e-05, "loss": 0.028, "step": 3950 }, { "epoch": 5.301204819277109, "grad_norm": 0.4680522382259369, "learning_rate": 7.918e-05, "loss": 0.0265, "step": 3960 }, { "epoch": 5.314591700133869, "grad_norm": 0.4879295825958252, "learning_rate": 7.938e-05, "loss": 0.0281, "step": 3970 }, { "epoch": 5.327978580990629, "grad_norm": 0.7061047554016113, "learning_rate": 7.958e-05, "loss": 0.0349, "step": 3980 }, { "epoch": 5.341365461847389, "grad_norm": 0.5388431549072266, "learning_rate": 7.978e-05, "loss": 0.0305, "step": 3990 }, { "epoch": 5.35475234270415, "grad_norm": 0.33446481823921204, "learning_rate": 7.998e-05, "loss": 0.0267, "step": 4000 }, { "epoch": 5.3681392235609104, "grad_norm": 0.6163093447685242, "learning_rate": 8.018e-05, "loss": 0.0273, "step": 4010 }, { "epoch": 5.381526104417671, "grad_norm": 0.32405781745910645, "learning_rate": 8.038e-05, "loss": 0.0247, "step": 4020 }, { "epoch": 5.394912985274431, "grad_norm": 0.311591774225235, "learning_rate": 8.058e-05, "loss": 0.0225, "step": 4030 }, { "epoch": 5.408299866131191, "grad_norm": 0.3569055497646332, "learning_rate": 8.078e-05, "loss": 0.0234, "step": 4040 }, { "epoch": 5.421686746987952, "grad_norm": 0.36045756936073303, "learning_rate": 8.098e-05, "loss": 0.0233, "step": 4050 }, { "epoch": 5.435073627844712, "grad_norm": 0.33300286531448364, "learning_rate": 8.118e-05, "loss": 0.0243, "step": 4060 }, { "epoch": 5.448460508701473, "grad_norm": 0.3248492181301117, "learning_rate": 8.138e-05, "loss": 0.0249, "step": 4070 }, { "epoch": 5.461847389558233, "grad_norm": 0.3006216883659363, "learning_rate": 8.158e-05, "loss": 0.0251, "step": 4080 }, { "epoch": 5.475234270414993, "grad_norm": 0.3839801251888275, "learning_rate": 8.178e-05, "loss": 0.0226, "step": 4090 }, { "epoch": 5.4886211512717535, "grad_norm": 0.26727837324142456, "learning_rate": 8.198e-05, "loss": 0.0209, "step": 4100 }, { "epoch": 5.502008032128514, "grad_norm": 0.4104100167751312, "learning_rate": 8.218e-05, "loss": 0.0226, "step": 4110 }, { "epoch": 5.515394912985275, "grad_norm": 0.8125231266021729, "learning_rate": 8.238000000000001e-05, "loss": 0.0231, "step": 4120 }, { "epoch": 5.528781793842035, "grad_norm": 0.5097118020057678, "learning_rate": 8.258e-05, "loss": 0.0212, "step": 4130 }, { "epoch": 5.542168674698795, "grad_norm": 0.36725330352783203, "learning_rate": 8.278e-05, "loss": 0.0265, "step": 4140 }, { "epoch": 5.555555555555555, "grad_norm": 0.34032171964645386, "learning_rate": 8.298000000000001e-05, "loss": 0.0194, "step": 4150 }, { "epoch": 5.568942436412316, "grad_norm": 0.3972896337509155, "learning_rate": 8.318e-05, "loss": 0.0264, "step": 4160 }, { "epoch": 5.582329317269076, "grad_norm": 0.32940852642059326, "learning_rate": 8.338e-05, "loss": 0.0214, "step": 4170 }, { "epoch": 5.595716198125837, "grad_norm": 0.3801056146621704, "learning_rate": 8.358e-05, "loss": 0.0231, "step": 4180 }, { "epoch": 5.6091030789825975, "grad_norm": 0.5233598351478577, "learning_rate": 8.378e-05, "loss": 0.0211, "step": 4190 }, { "epoch": 5.622489959839357, "grad_norm": 0.27911245822906494, "learning_rate": 8.398e-05, "loss": 0.0244, "step": 4200 }, { "epoch": 5.635876840696118, "grad_norm": 0.27777108550071716, "learning_rate": 8.418e-05, "loss": 0.0197, "step": 4210 }, { "epoch": 5.649263721552878, "grad_norm": 0.3151960074901581, "learning_rate": 8.438e-05, "loss": 0.0205, "step": 4220 }, { "epoch": 5.662650602409639, "grad_norm": 0.32723698019981384, "learning_rate": 8.458e-05, "loss": 0.024, "step": 4230 }, { "epoch": 5.6760374832663985, "grad_norm": 0.3100205361843109, "learning_rate": 8.478e-05, "loss": 0.0216, "step": 4240 }, { "epoch": 5.689424364123159, "grad_norm": 0.370474249124527, "learning_rate": 8.498e-05, "loss": 0.0275, "step": 4250 }, { "epoch": 5.7028112449799195, "grad_norm": 0.27281197905540466, "learning_rate": 8.518000000000001e-05, "loss": 0.0232, "step": 4260 }, { "epoch": 5.71619812583668, "grad_norm": 0.3006538152694702, "learning_rate": 8.538e-05, "loss": 0.0243, "step": 4270 }, { "epoch": 5.729585006693441, "grad_norm": 0.34175536036491394, "learning_rate": 8.558e-05, "loss": 0.023, "step": 4280 }, { "epoch": 5.742971887550201, "grad_norm": 0.2790347635746002, "learning_rate": 8.578000000000001e-05, "loss": 0.0216, "step": 4290 }, { "epoch": 5.756358768406961, "grad_norm": 1.3794137239456177, "learning_rate": 8.598e-05, "loss": 0.0221, "step": 4300 }, { "epoch": 5.769745649263721, "grad_norm": 0.3085281252861023, "learning_rate": 8.618e-05, "loss": 0.026, "step": 4310 }, { "epoch": 5.783132530120482, "grad_norm": 0.19296589493751526, "learning_rate": 8.638000000000001e-05, "loss": 0.025, "step": 4320 }, { "epoch": 5.796519410977242, "grad_norm": 0.1806168407201767, "learning_rate": 8.658e-05, "loss": 0.022, "step": 4330 }, { "epoch": 5.809906291834003, "grad_norm": 0.30470237135887146, "learning_rate": 8.678e-05, "loss": 0.0238, "step": 4340 }, { "epoch": 5.823293172690763, "grad_norm": 0.35208219289779663, "learning_rate": 8.698000000000001e-05, "loss": 0.0262, "step": 4350 }, { "epoch": 5.836680053547523, "grad_norm": 0.33472466468811035, "learning_rate": 8.718e-05, "loss": 0.0273, "step": 4360 }, { "epoch": 5.850066934404284, "grad_norm": 0.3481844663619995, "learning_rate": 8.738000000000001e-05, "loss": 0.03, "step": 4370 }, { "epoch": 5.863453815261044, "grad_norm": 0.3978363573551178, "learning_rate": 8.758000000000001e-05, "loss": 0.0246, "step": 4380 }, { "epoch": 5.876840696117805, "grad_norm": 0.5215231776237488, "learning_rate": 8.778e-05, "loss": 0.0255, "step": 4390 }, { "epoch": 5.890227576974565, "grad_norm": 1.1153380870819092, "learning_rate": 8.798000000000001e-05, "loss": 0.0274, "step": 4400 }, { "epoch": 5.903614457831325, "grad_norm": 0.8833138942718506, "learning_rate": 8.818000000000001e-05, "loss": 0.0263, "step": 4410 }, { "epoch": 5.9170013386880855, "grad_norm": 0.31609559059143066, "learning_rate": 8.838e-05, "loss": 0.0273, "step": 4420 }, { "epoch": 5.930388219544846, "grad_norm": 0.3090187609195709, "learning_rate": 8.858000000000001e-05, "loss": 0.0266, "step": 4430 }, { "epoch": 5.943775100401607, "grad_norm": 0.3256443738937378, "learning_rate": 8.878000000000001e-05, "loss": 0.0228, "step": 4440 }, { "epoch": 5.957161981258367, "grad_norm": 0.4660596549510956, "learning_rate": 8.898e-05, "loss": 0.0254, "step": 4450 }, { "epoch": 5.970548862115127, "grad_norm": 0.40815967321395874, "learning_rate": 8.918000000000001e-05, "loss": 0.0292, "step": 4460 }, { "epoch": 5.983935742971887, "grad_norm": 0.4219057857990265, "learning_rate": 8.938e-05, "loss": 0.0255, "step": 4470 }, { "epoch": 5.997322623828648, "grad_norm": 0.3461907207965851, "learning_rate": 8.958e-05, "loss": 0.0238, "step": 4480 }, { "epoch": 6.010709504685408, "grad_norm": 0.3458860218524933, "learning_rate": 8.978000000000001e-05, "loss": 0.0229, "step": 4490 }, { "epoch": 6.024096385542169, "grad_norm": 0.28998249769210815, "learning_rate": 8.998e-05, "loss": 0.0274, "step": 4500 }, { "epoch": 6.037483266398929, "grad_norm": 0.46222221851348877, "learning_rate": 9.018000000000001e-05, "loss": 0.0242, "step": 4510 }, { "epoch": 6.050870147255689, "grad_norm": 0.34611383080482483, "learning_rate": 9.038000000000001e-05, "loss": 0.0243, "step": 4520 }, { "epoch": 6.06425702811245, "grad_norm": 0.3044336140155792, "learning_rate": 9.058e-05, "loss": 0.0227, "step": 4530 }, { "epoch": 6.07764390896921, "grad_norm": 0.5119031071662903, "learning_rate": 9.078000000000001e-05, "loss": 0.0266, "step": 4540 }, { "epoch": 6.091030789825971, "grad_norm": 0.253508597612381, "learning_rate": 9.098000000000001e-05, "loss": 0.0201, "step": 4550 }, { "epoch": 6.104417670682731, "grad_norm": 0.8504990935325623, "learning_rate": 9.118e-05, "loss": 0.0246, "step": 4560 }, { "epoch": 6.117804551539491, "grad_norm": 0.9967617392539978, "learning_rate": 9.138e-05, "loss": 0.0209, "step": 4570 }, { "epoch": 6.1311914323962515, "grad_norm": 0.24610720574855804, "learning_rate": 9.158e-05, "loss": 0.0283, "step": 4580 }, { "epoch": 6.144578313253012, "grad_norm": 0.6535239815711975, "learning_rate": 9.178e-05, "loss": 0.0274, "step": 4590 }, { "epoch": 6.157965194109773, "grad_norm": 0.4480728507041931, "learning_rate": 9.198e-05, "loss": 0.0239, "step": 4600 }, { "epoch": 6.171352074966533, "grad_norm": 0.38666656613349915, "learning_rate": 9.218e-05, "loss": 0.0235, "step": 4610 }, { "epoch": 6.184738955823293, "grad_norm": 0.2891569137573242, "learning_rate": 9.238e-05, "loss": 0.0236, "step": 4620 }, { "epoch": 6.198125836680053, "grad_norm": 0.29950466752052307, "learning_rate": 9.258e-05, "loss": 0.0203, "step": 4630 }, { "epoch": 6.211512717536814, "grad_norm": 0.28779658675193787, "learning_rate": 9.278e-05, "loss": 0.02, "step": 4640 }, { "epoch": 6.224899598393574, "grad_norm": 0.5233101844787598, "learning_rate": 9.298e-05, "loss": 0.0196, "step": 4650 }, { "epoch": 6.238286479250335, "grad_norm": 0.38271644711494446, "learning_rate": 9.318e-05, "loss": 0.0231, "step": 4660 }, { "epoch": 6.251673360107095, "grad_norm": 0.4077036678791046, "learning_rate": 9.338e-05, "loss": 0.0233, "step": 4670 }, { "epoch": 6.265060240963855, "grad_norm": 0.3430933952331543, "learning_rate": 9.358e-05, "loss": 0.0209, "step": 4680 }, { "epoch": 6.278447121820616, "grad_norm": 0.40982070565223694, "learning_rate": 9.378e-05, "loss": 0.0264, "step": 4690 }, { "epoch": 6.291834002677376, "grad_norm": 0.40560463070869446, "learning_rate": 9.398e-05, "loss": 0.0253, "step": 4700 }, { "epoch": 6.305220883534137, "grad_norm": 0.3927428424358368, "learning_rate": 9.418e-05, "loss": 0.0242, "step": 4710 }, { "epoch": 6.318607764390897, "grad_norm": 0.4643862843513489, "learning_rate": 9.438e-05, "loss": 0.0254, "step": 4720 }, { "epoch": 6.331994645247657, "grad_norm": 0.3309985101222992, "learning_rate": 9.458e-05, "loss": 0.0254, "step": 4730 }, { "epoch": 6.3453815261044175, "grad_norm": 0.32817134261131287, "learning_rate": 9.478e-05, "loss": 0.0238, "step": 4740 }, { "epoch": 6.358768406961178, "grad_norm": 0.35388660430908203, "learning_rate": 9.498e-05, "loss": 0.0256, "step": 4750 }, { "epoch": 6.372155287817939, "grad_norm": 0.2524287700653076, "learning_rate": 9.518000000000001e-05, "loss": 0.0265, "step": 4760 }, { "epoch": 6.385542168674699, "grad_norm": 0.303363174200058, "learning_rate": 9.538e-05, "loss": 0.0229, "step": 4770 }, { "epoch": 6.398929049531459, "grad_norm": 0.33565661311149597, "learning_rate": 9.558e-05, "loss": 0.0261, "step": 4780 }, { "epoch": 6.412315930388219, "grad_norm": 0.2781584560871124, "learning_rate": 9.578000000000001e-05, "loss": 0.0255, "step": 4790 }, { "epoch": 6.42570281124498, "grad_norm": 0.18126459419727325, "learning_rate": 9.598e-05, "loss": 0.022, "step": 4800 }, { "epoch": 6.43908969210174, "grad_norm": 0.2545143663883209, "learning_rate": 9.618e-05, "loss": 0.0251, "step": 4810 }, { "epoch": 6.452476572958501, "grad_norm": 0.4660956561565399, "learning_rate": 9.638000000000001e-05, "loss": 0.0283, "step": 4820 }, { "epoch": 6.4658634538152615, "grad_norm": 0.39580127596855164, "learning_rate": 9.658e-05, "loss": 0.024, "step": 4830 }, { "epoch": 6.479250334672021, "grad_norm": 0.40582677721977234, "learning_rate": 9.678e-05, "loss": 0.0278, "step": 4840 }, { "epoch": 6.492637215528782, "grad_norm": 0.21322797238826752, "learning_rate": 9.698000000000001e-05, "loss": 0.0267, "step": 4850 }, { "epoch": 6.506024096385542, "grad_norm": 0.35335084795951843, "learning_rate": 9.718e-05, "loss": 0.0275, "step": 4860 }, { "epoch": 6.519410977242303, "grad_norm": 0.41027355194091797, "learning_rate": 9.738e-05, "loss": 0.0243, "step": 4870 }, { "epoch": 6.532797858099062, "grad_norm": 0.5281573534011841, "learning_rate": 9.758000000000001e-05, "loss": 0.0249, "step": 4880 }, { "epoch": 6.546184738955823, "grad_norm": 0.3980945348739624, "learning_rate": 9.778e-05, "loss": 0.0225, "step": 4890 }, { "epoch": 6.5595716198125835, "grad_norm": 0.4215918183326721, "learning_rate": 9.798000000000001e-05, "loss": 0.0232, "step": 4900 }, { "epoch": 6.572958500669344, "grad_norm": 0.35187003016471863, "learning_rate": 9.818000000000001e-05, "loss": 0.023, "step": 4910 }, { "epoch": 6.586345381526105, "grad_norm": 0.2777998745441437, "learning_rate": 9.838e-05, "loss": 0.0233, "step": 4920 }, { "epoch": 6.599732262382865, "grad_norm": 0.38700345158576965, "learning_rate": 9.858000000000001e-05, "loss": 0.0289, "step": 4930 }, { "epoch": 6.613119143239625, "grad_norm": 0.4227716028690338, "learning_rate": 9.878e-05, "loss": 0.024, "step": 4940 }, { "epoch": 6.626506024096385, "grad_norm": 0.47916266322135925, "learning_rate": 9.898e-05, "loss": 0.0272, "step": 4950 }, { "epoch": 6.639892904953146, "grad_norm": 0.3218872845172882, "learning_rate": 9.918000000000001e-05, "loss": 0.0281, "step": 4960 }, { "epoch": 6.653279785809906, "grad_norm": 0.20785541832447052, "learning_rate": 9.938e-05, "loss": 0.023, "step": 4970 }, { "epoch": 6.666666666666667, "grad_norm": 0.29337701201438904, "learning_rate": 9.958e-05, "loss": 0.026, "step": 4980 }, { "epoch": 6.680053547523427, "grad_norm": 0.39317750930786133, "learning_rate": 9.978000000000001e-05, "loss": 0.0238, "step": 4990 }, { "epoch": 6.693440428380187, "grad_norm": 0.3909324109554291, "learning_rate": 9.998e-05, "loss": 0.0238, "step": 5000 }, { "epoch": 6.706827309236948, "grad_norm": 0.2834399342536926, "learning_rate": 9.999999778549045e-05, "loss": 0.0232, "step": 5010 }, { "epoch": 6.720214190093708, "grad_norm": 0.24843740463256836, "learning_rate": 9.999999013039593e-05, "loss": 0.0232, "step": 5020 }, { "epoch": 6.733601070950469, "grad_norm": 0.44027194380760193, "learning_rate": 9.999997700737766e-05, "loss": 0.0245, "step": 5030 }, { "epoch": 6.746987951807229, "grad_norm": 0.20299236476421356, "learning_rate": 9.999995841643709e-05, "loss": 0.0263, "step": 5040 }, { "epoch": 6.760374832663989, "grad_norm": 0.18285918235778809, "learning_rate": 9.999993435757623e-05, "loss": 0.0239, "step": 5050 }, { "epoch": 6.7737617135207495, "grad_norm": 0.5096527338027954, "learning_rate": 9.999990483079773e-05, "loss": 0.0254, "step": 5060 }, { "epoch": 6.78714859437751, "grad_norm": 0.45981094241142273, "learning_rate": 9.999986983610481e-05, "loss": 0.0247, "step": 5070 }, { "epoch": 6.800535475234271, "grad_norm": 0.21177181601524353, "learning_rate": 9.99998293735013e-05, "loss": 0.023, "step": 5080 }, { "epoch": 6.813922356091031, "grad_norm": 0.5241486430168152, "learning_rate": 9.999978344299161e-05, "loss": 0.0251, "step": 5090 }, { "epoch": 6.827309236947791, "grad_norm": 0.32289955019950867, "learning_rate": 9.99997320445808e-05, "loss": 0.0235, "step": 5100 }, { "epoch": 6.840696117804551, "grad_norm": 0.3331092596054077, "learning_rate": 9.999967517827444e-05, "loss": 0.0277, "step": 5110 }, { "epoch": 6.854082998661312, "grad_norm": 0.2813813090324402, "learning_rate": 9.999961284407879e-05, "loss": 0.0259, "step": 5120 }, { "epoch": 6.867469879518072, "grad_norm": 0.22564518451690674, "learning_rate": 9.999954504200067e-05, "loss": 0.0254, "step": 5130 }, { "epoch": 6.880856760374833, "grad_norm": 0.2737979292869568, "learning_rate": 9.999947177204744e-05, "loss": 0.0238, "step": 5140 }, { "epoch": 6.8942436412315935, "grad_norm": 0.39670711755752563, "learning_rate": 9.999939303422718e-05, "loss": 0.0286, "step": 5150 }, { "epoch": 6.907630522088353, "grad_norm": 0.24273259937763214, "learning_rate": 9.999930882854847e-05, "loss": 0.0236, "step": 5160 }, { "epoch": 6.921017402945114, "grad_norm": 0.4295795261859894, "learning_rate": 9.999921915502051e-05, "loss": 0.0335, "step": 5170 }, { "epoch": 6.934404283801874, "grad_norm": 0.3779001235961914, "learning_rate": 9.99991240136531e-05, "loss": 0.0281, "step": 5180 }, { "epoch": 6.947791164658635, "grad_norm": 0.25678643584251404, "learning_rate": 9.999902340445668e-05, "loss": 0.0267, "step": 5190 }, { "epoch": 6.961178045515394, "grad_norm": 0.2889895439147949, "learning_rate": 9.999891732744224e-05, "loss": 0.0279, "step": 5200 }, { "epoch": 6.974564926372155, "grad_norm": 0.36115068197250366, "learning_rate": 9.999880578262135e-05, "loss": 0.035, "step": 5210 }, { "epoch": 6.9879518072289155, "grad_norm": 0.38910403847694397, "learning_rate": 9.999868877000624e-05, "loss": 0.0314, "step": 5220 }, { "epoch": 7.001338688085676, "grad_norm": 0.2935853898525238, "learning_rate": 9.99985662896097e-05, "loss": 0.027, "step": 5230 }, { "epoch": 7.014725568942437, "grad_norm": 0.4407844841480255, "learning_rate": 9.999843834144513e-05, "loss": 0.0268, "step": 5240 }, { "epoch": 7.028112449799197, "grad_norm": 0.6856471300125122, "learning_rate": 9.99983049255265e-05, "loss": 0.0259, "step": 5250 }, { "epoch": 7.041499330655957, "grad_norm": 0.24441684782505035, "learning_rate": 9.999816604186843e-05, "loss": 0.0289, "step": 5260 }, { "epoch": 7.054886211512717, "grad_norm": 0.28608137369155884, "learning_rate": 9.999802169048609e-05, "loss": 0.0301, "step": 5270 }, { "epoch": 7.068273092369478, "grad_norm": 0.3028070330619812, "learning_rate": 9.999787187139527e-05, "loss": 0.0261, "step": 5280 }, { "epoch": 7.081659973226238, "grad_norm": 0.3120931386947632, "learning_rate": 9.999771658461234e-05, "loss": 0.0267, "step": 5290 }, { "epoch": 7.095046854082999, "grad_norm": 0.2831306755542755, "learning_rate": 9.999755583015431e-05, "loss": 0.024, "step": 5300 }, { "epoch": 7.108433734939759, "grad_norm": 0.25376415252685547, "learning_rate": 9.999738960803874e-05, "loss": 0.0219, "step": 5310 }, { "epoch": 7.121820615796519, "grad_norm": 0.20148849487304688, "learning_rate": 9.99972179182838e-05, "loss": 0.0224, "step": 5320 }, { "epoch": 7.13520749665328, "grad_norm": 1.3556430339813232, "learning_rate": 9.99970407609083e-05, "loss": 0.0241, "step": 5330 }, { "epoch": 7.14859437751004, "grad_norm": 0.24522621929645538, "learning_rate": 9.999685813593159e-05, "loss": 0.0239, "step": 5340 }, { "epoch": 7.161981258366801, "grad_norm": 0.4029282331466675, "learning_rate": 9.999667004337362e-05, "loss": 0.0281, "step": 5350 }, { "epoch": 7.175368139223561, "grad_norm": 0.29282984137535095, "learning_rate": 9.9996476483255e-05, "loss": 0.0292, "step": 5360 }, { "epoch": 7.188755020080321, "grad_norm": 0.28943967819213867, "learning_rate": 9.999627745559688e-05, "loss": 0.0217, "step": 5370 }, { "epoch": 7.2021419009370815, "grad_norm": 0.45234090089797974, "learning_rate": 9.999607296042101e-05, "loss": 0.0233, "step": 5380 }, { "epoch": 7.215528781793842, "grad_norm": 0.271823525428772, "learning_rate": 9.99958629977498e-05, "loss": 0.0226, "step": 5390 }, { "epoch": 7.228915662650603, "grad_norm": 0.17379288375377655, "learning_rate": 9.999564756760615e-05, "loss": 0.0228, "step": 5400 }, { "epoch": 7.242302543507363, "grad_norm": 0.24620985984802246, "learning_rate": 9.999542667001366e-05, "loss": 0.0207, "step": 5410 }, { "epoch": 7.255689424364123, "grad_norm": 0.27572986483573914, "learning_rate": 9.999520030499647e-05, "loss": 0.0262, "step": 5420 }, { "epoch": 7.269076305220883, "grad_norm": 0.468960702419281, "learning_rate": 9.999496847257936e-05, "loss": 0.0237, "step": 5430 }, { "epoch": 7.282463186077644, "grad_norm": 0.44337499141693115, "learning_rate": 9.999473117278764e-05, "loss": 0.0212, "step": 5440 }, { "epoch": 7.295850066934404, "grad_norm": 0.45084112882614136, "learning_rate": 9.999448840564731e-05, "loss": 0.0209, "step": 5450 }, { "epoch": 7.309236947791165, "grad_norm": 0.48062583804130554, "learning_rate": 9.999424017118488e-05, "loss": 0.0242, "step": 5460 }, { "epoch": 7.3226238286479255, "grad_norm": 0.2587013244628906, "learning_rate": 9.999398646942751e-05, "loss": 0.0221, "step": 5470 }, { "epoch": 7.336010709504685, "grad_norm": 0.29550689458847046, "learning_rate": 9.999372730040296e-05, "loss": 0.0207, "step": 5480 }, { "epoch": 7.349397590361446, "grad_norm": 0.5307635068893433, "learning_rate": 9.999346266413953e-05, "loss": 0.022, "step": 5490 }, { "epoch": 7.362784471218206, "grad_norm": 0.27674639225006104, "learning_rate": 9.99931925606662e-05, "loss": 0.0236, "step": 5500 }, { "epoch": 7.376171352074967, "grad_norm": 0.31229567527770996, "learning_rate": 9.99929169900125e-05, "loss": 0.0232, "step": 5510 }, { "epoch": 7.389558232931727, "grad_norm": 0.24530723690986633, "learning_rate": 9.999263595220855e-05, "loss": 0.0253, "step": 5520 }, { "epoch": 7.402945113788487, "grad_norm": 0.3317316174507141, "learning_rate": 9.99923494472851e-05, "loss": 0.0226, "step": 5530 }, { "epoch": 7.4163319946452475, "grad_norm": 0.27915093302726746, "learning_rate": 9.999205747527348e-05, "loss": 0.0218, "step": 5540 }, { "epoch": 7.429718875502008, "grad_norm": 0.24965204298496246, "learning_rate": 9.999176003620561e-05, "loss": 0.0221, "step": 5550 }, { "epoch": 7.443105756358769, "grad_norm": 0.36555948853492737, "learning_rate": 9.999145713011405e-05, "loss": 0.0262, "step": 5560 }, { "epoch": 7.456492637215529, "grad_norm": 0.3121846616268158, "learning_rate": 9.999114875703186e-05, "loss": 0.0229, "step": 5570 }, { "epoch": 7.469879518072289, "grad_norm": 0.3219899833202362, "learning_rate": 9.999083491699281e-05, "loss": 0.0272, "step": 5580 }, { "epoch": 7.483266398929049, "grad_norm": 0.46703943610191345, "learning_rate": 9.999051561003123e-05, "loss": 0.0238, "step": 5590 }, { "epoch": 7.49665327978581, "grad_norm": 0.2729679048061371, "learning_rate": 9.999019083618202e-05, "loss": 0.0228, "step": 5600 }, { "epoch": 7.51004016064257, "grad_norm": 0.27691650390625, "learning_rate": 9.99898605954807e-05, "loss": 0.0244, "step": 5610 }, { "epoch": 7.523427041499331, "grad_norm": 0.23693884909152985, "learning_rate": 9.998952488796338e-05, "loss": 0.0235, "step": 5620 }, { "epoch": 7.536813922356091, "grad_norm": 0.3024663031101227, "learning_rate": 9.998918371366676e-05, "loss": 0.0252, "step": 5630 }, { "epoch": 7.550200803212851, "grad_norm": 0.2323358952999115, "learning_rate": 9.99888370726282e-05, "loss": 0.022, "step": 5640 }, { "epoch": 7.563587684069612, "grad_norm": 0.38761770725250244, "learning_rate": 9.998848496488556e-05, "loss": 0.0238, "step": 5650 }, { "epoch": 7.576974564926372, "grad_norm": 0.36184096336364746, "learning_rate": 9.998812739047736e-05, "loss": 0.0249, "step": 5660 }, { "epoch": 7.590361445783133, "grad_norm": 0.3343724012374878, "learning_rate": 9.99877643494427e-05, "loss": 0.0226, "step": 5670 }, { "epoch": 7.603748326639893, "grad_norm": 0.2162223607301712, "learning_rate": 9.998739584182128e-05, "loss": 0.0235, "step": 5680 }, { "epoch": 7.617135207496653, "grad_norm": 0.3495326340198517, "learning_rate": 9.998702186765342e-05, "loss": 0.0265, "step": 5690 }, { "epoch": 7.6305220883534135, "grad_norm": 0.3426989018917084, "learning_rate": 9.998664242698e-05, "loss": 0.0215, "step": 5700 }, { "epoch": 7.643908969210174, "grad_norm": 0.44529709219932556, "learning_rate": 9.998625751984251e-05, "loss": 0.0216, "step": 5710 }, { "epoch": 7.657295850066935, "grad_norm": 0.557045578956604, "learning_rate": 9.998586714628307e-05, "loss": 0.024, "step": 5720 }, { "epoch": 7.670682730923695, "grad_norm": 0.317541241645813, "learning_rate": 9.998547130634432e-05, "loss": 0.0238, "step": 5730 }, { "epoch": 7.684069611780455, "grad_norm": 0.3327717185020447, "learning_rate": 9.99850700000696e-05, "loss": 0.0252, "step": 5740 }, { "epoch": 7.697456492637215, "grad_norm": 0.27602618932724, "learning_rate": 9.998466322750278e-05, "loss": 0.0228, "step": 5750 }, { "epoch": 7.710843373493976, "grad_norm": 0.17282915115356445, "learning_rate": 9.998425098868834e-05, "loss": 0.021, "step": 5760 }, { "epoch": 7.724230254350736, "grad_norm": 0.30519095063209534, "learning_rate": 9.998383328367136e-05, "loss": 0.0238, "step": 5770 }, { "epoch": 7.737617135207497, "grad_norm": 0.2900030016899109, "learning_rate": 9.99834101124975e-05, "loss": 0.0225, "step": 5780 }, { "epoch": 7.7510040160642575, "grad_norm": 0.21800728142261505, "learning_rate": 9.998298147521309e-05, "loss": 0.0207, "step": 5790 }, { "epoch": 7.764390896921017, "grad_norm": 0.22136147320270538, "learning_rate": 9.998254737186496e-05, "loss": 0.0206, "step": 5800 }, { "epoch": 7.777777777777778, "grad_norm": 0.8132770657539368, "learning_rate": 9.99821078025006e-05, "loss": 0.0225, "step": 5810 }, { "epoch": 7.791164658634538, "grad_norm": 0.6801872849464417, "learning_rate": 9.998166276716807e-05, "loss": 0.0252, "step": 5820 }, { "epoch": 7.804551539491299, "grad_norm": 0.5249801278114319, "learning_rate": 9.998121226591606e-05, "loss": 0.0224, "step": 5830 }, { "epoch": 7.817938420348058, "grad_norm": 0.23327824473381042, "learning_rate": 9.998075629879382e-05, "loss": 0.024, "step": 5840 }, { "epoch": 7.831325301204819, "grad_norm": 0.24490536749362946, "learning_rate": 9.99802948658512e-05, "loss": 0.0264, "step": 5850 }, { "epoch": 7.8447121820615795, "grad_norm": 0.33781930804252625, "learning_rate": 9.99798279671387e-05, "loss": 0.0267, "step": 5860 }, { "epoch": 7.85809906291834, "grad_norm": 0.22902335226535797, "learning_rate": 9.997935560270734e-05, "loss": 0.0234, "step": 5870 }, { "epoch": 7.871485943775101, "grad_norm": 0.3029695749282837, "learning_rate": 9.997887777260879e-05, "loss": 0.0239, "step": 5880 }, { "epoch": 7.884872824631861, "grad_norm": 0.2534645199775696, "learning_rate": 9.997839447689532e-05, "loss": 0.0218, "step": 5890 }, { "epoch": 7.898259705488621, "grad_norm": 0.15101848542690277, "learning_rate": 9.997790571561978e-05, "loss": 0.0219, "step": 5900 }, { "epoch": 7.911646586345381, "grad_norm": 0.27729031443595886, "learning_rate": 9.99774114888356e-05, "loss": 0.0225, "step": 5910 }, { "epoch": 7.925033467202142, "grad_norm": 0.3550078868865967, "learning_rate": 9.997691179659684e-05, "loss": 0.0272, "step": 5920 }, { "epoch": 7.938420348058902, "grad_norm": 0.319739431142807, "learning_rate": 9.997640663895815e-05, "loss": 0.0284, "step": 5930 }, { "epoch": 7.951807228915663, "grad_norm": 0.30274805426597595, "learning_rate": 9.997589601597477e-05, "loss": 0.0272, "step": 5940 }, { "epoch": 7.965194109772423, "grad_norm": 0.24146364629268646, "learning_rate": 9.997537992770252e-05, "loss": 0.0235, "step": 5950 }, { "epoch": 7.978580990629183, "grad_norm": 0.37982890009880066, "learning_rate": 9.997485837419788e-05, "loss": 0.0237, "step": 5960 }, { "epoch": 7.991967871485944, "grad_norm": 0.26710939407348633, "learning_rate": 9.997433135551786e-05, "loss": 0.0201, "step": 5970 }, { "epoch": 8.005354752342704, "grad_norm": 0.4361531436443329, "learning_rate": 9.997379887172009e-05, "loss": 0.0228, "step": 5980 }, { "epoch": 8.018741633199465, "grad_norm": 0.35674068331718445, "learning_rate": 9.997326092286281e-05, "loss": 0.0242, "step": 5990 }, { "epoch": 8.032128514056225, "grad_norm": 0.28570520877838135, "learning_rate": 9.997271750900486e-05, "loss": 0.0222, "step": 6000 }, { "epoch": 8.045515394912986, "grad_norm": 0.2872113287448883, "learning_rate": 9.997216863020565e-05, "loss": 0.0198, "step": 6010 }, { "epoch": 8.058902275769746, "grad_norm": 0.24372072517871857, "learning_rate": 9.99716142865252e-05, "loss": 0.0229, "step": 6020 }, { "epoch": 8.072289156626505, "grad_norm": 0.2558363676071167, "learning_rate": 9.997105447802415e-05, "loss": 0.0202, "step": 6030 }, { "epoch": 8.085676037483266, "grad_norm": 0.3118811845779419, "learning_rate": 9.997048920476373e-05, "loss": 0.021, "step": 6040 }, { "epoch": 8.099062918340026, "grad_norm": 0.411066472530365, "learning_rate": 9.996991846680572e-05, "loss": 0.0216, "step": 6050 }, { "epoch": 8.112449799196787, "grad_norm": 0.3228205442428589, "learning_rate": 9.996934226421257e-05, "loss": 0.0233, "step": 6060 }, { "epoch": 8.125836680053547, "grad_norm": 0.31248629093170166, "learning_rate": 9.996876059704726e-05, "loss": 0.0248, "step": 6070 }, { "epoch": 8.139223560910308, "grad_norm": 0.950209379196167, "learning_rate": 9.996817346537343e-05, "loss": 0.0228, "step": 6080 }, { "epoch": 8.152610441767068, "grad_norm": 0.5372652411460876, "learning_rate": 9.996758086925526e-05, "loss": 0.0255, "step": 6090 }, { "epoch": 8.165997322623829, "grad_norm": 0.3421483039855957, "learning_rate": 9.996698280875759e-05, "loss": 0.0243, "step": 6100 }, { "epoch": 8.17938420348059, "grad_norm": 0.5312336683273315, "learning_rate": 9.99663792839458e-05, "loss": 0.0254, "step": 6110 }, { "epoch": 8.19277108433735, "grad_norm": 0.30150359869003296, "learning_rate": 9.99657702948859e-05, "loss": 0.0259, "step": 6120 }, { "epoch": 8.20615796519411, "grad_norm": 0.349402517080307, "learning_rate": 9.996515584164448e-05, "loss": 0.0259, "step": 6130 }, { "epoch": 8.21954484605087, "grad_norm": 0.21285264194011688, "learning_rate": 9.996453592428873e-05, "loss": 0.0217, "step": 6140 }, { "epoch": 8.23293172690763, "grad_norm": 0.5140072107315063, "learning_rate": 9.996391054288646e-05, "loss": 0.0209, "step": 6150 }, { "epoch": 8.24631860776439, "grad_norm": 0.3016437590122223, "learning_rate": 9.996327969750605e-05, "loss": 0.0185, "step": 6160 }, { "epoch": 8.259705488621151, "grad_norm": 0.2234225869178772, "learning_rate": 9.996264338821649e-05, "loss": 0.0193, "step": 6170 }, { "epoch": 8.273092369477911, "grad_norm": 0.3816507160663605, "learning_rate": 9.996200161508735e-05, "loss": 0.0229, "step": 6180 }, { "epoch": 8.286479250334672, "grad_norm": 0.3511485159397125, "learning_rate": 9.996135437818885e-05, "loss": 0.0183, "step": 6190 }, { "epoch": 8.299866131191433, "grad_norm": 0.22758659720420837, "learning_rate": 9.996070167759175e-05, "loss": 0.02, "step": 6200 }, { "epoch": 8.313253012048193, "grad_norm": 0.1647326499223709, "learning_rate": 9.996004351336743e-05, "loss": 0.0204, "step": 6210 }, { "epoch": 8.326639892904954, "grad_norm": 0.3108470141887665, "learning_rate": 9.995937988558785e-05, "loss": 0.0229, "step": 6220 }, { "epoch": 8.340026773761714, "grad_norm": 0.35291072726249695, "learning_rate": 9.995871079432561e-05, "loss": 0.0205, "step": 6230 }, { "epoch": 8.353413654618475, "grad_norm": 0.16245238482952118, "learning_rate": 9.995803623965389e-05, "loss": 0.0193, "step": 6240 }, { "epoch": 8.366800535475234, "grad_norm": 0.335728257894516, "learning_rate": 9.995735622164641e-05, "loss": 0.0202, "step": 6250 }, { "epoch": 8.380187416331994, "grad_norm": 0.33988887071609497, "learning_rate": 9.995667074037758e-05, "loss": 0.0169, "step": 6260 }, { "epoch": 8.393574297188755, "grad_norm": 0.285746306180954, "learning_rate": 9.995597979592232e-05, "loss": 0.0197, "step": 6270 }, { "epoch": 8.406961178045515, "grad_norm": 0.16346196830272675, "learning_rate": 9.995528338835625e-05, "loss": 0.0196, "step": 6280 }, { "epoch": 8.420348058902276, "grad_norm": 0.2483651489019394, "learning_rate": 9.995458151775547e-05, "loss": 0.0195, "step": 6290 }, { "epoch": 8.433734939759036, "grad_norm": 0.16983209550380707, "learning_rate": 9.995387418419677e-05, "loss": 0.0191, "step": 6300 }, { "epoch": 8.447121820615797, "grad_norm": 0.20587603747844696, "learning_rate": 9.99531613877575e-05, "loss": 0.0166, "step": 6310 }, { "epoch": 8.460508701472557, "grad_norm": 0.2709047496318817, "learning_rate": 9.995244312851559e-05, "loss": 0.0201, "step": 6320 }, { "epoch": 8.473895582329318, "grad_norm": 0.20796483755111694, "learning_rate": 9.995171940654961e-05, "loss": 0.0194, "step": 6330 }, { "epoch": 8.487282463186078, "grad_norm": 0.1657964289188385, "learning_rate": 9.995099022193871e-05, "loss": 0.0206, "step": 6340 }, { "epoch": 8.500669344042837, "grad_norm": 0.36185142397880554, "learning_rate": 9.995025557476261e-05, "loss": 0.0223, "step": 6350 }, { "epoch": 8.514056224899598, "grad_norm": 0.20145148038864136, "learning_rate": 9.994951546510165e-05, "loss": 0.0207, "step": 6360 }, { "epoch": 8.527443105756358, "grad_norm": 0.2887599468231201, "learning_rate": 9.994876989303679e-05, "loss": 0.0204, "step": 6370 }, { "epoch": 8.540829986613119, "grad_norm": 0.3220275640487671, "learning_rate": 9.994801885864955e-05, "loss": 0.0203, "step": 6380 }, { "epoch": 8.55421686746988, "grad_norm": 0.3457286059856415, "learning_rate": 9.994726236202205e-05, "loss": 0.0189, "step": 6390 }, { "epoch": 8.56760374832664, "grad_norm": 0.2487928867340088, "learning_rate": 9.994650040323704e-05, "loss": 0.025, "step": 6400 }, { "epoch": 8.5809906291834, "grad_norm": 1.7030549049377441, "learning_rate": 9.994573298237784e-05, "loss": 0.0236, "step": 6410 }, { "epoch": 8.594377510040161, "grad_norm": 0.2442339062690735, "learning_rate": 9.994496009952837e-05, "loss": 0.023, "step": 6420 }, { "epoch": 8.607764390896921, "grad_norm": 0.3672276437282562, "learning_rate": 9.994418175477316e-05, "loss": 0.0228, "step": 6430 }, { "epoch": 8.621151271753682, "grad_norm": 0.2867424786090851, "learning_rate": 9.994339794819733e-05, "loss": 0.0222, "step": 6440 }, { "epoch": 8.634538152610443, "grad_norm": 0.35675251483917236, "learning_rate": 9.994260867988658e-05, "loss": 0.0221, "step": 6450 }, { "epoch": 8.647925033467201, "grad_norm": 0.36941251158714294, "learning_rate": 9.994181394992723e-05, "loss": 0.019, "step": 6460 }, { "epoch": 8.661311914323962, "grad_norm": 0.24012073874473572, "learning_rate": 9.994101375840618e-05, "loss": 0.0219, "step": 6470 }, { "epoch": 8.674698795180722, "grad_norm": 0.2285100817680359, "learning_rate": 9.994020810541098e-05, "loss": 0.0191, "step": 6480 }, { "epoch": 8.688085676037483, "grad_norm": 0.326668918132782, "learning_rate": 9.99393969910297e-05, "loss": 0.02, "step": 6490 }, { "epoch": 8.701472556894243, "grad_norm": 0.22297553718090057, "learning_rate": 9.993858041535104e-05, "loss": 0.0202, "step": 6500 }, { "epoch": 8.714859437751004, "grad_norm": 0.715004026889801, "learning_rate": 9.99377583784643e-05, "loss": 0.0243, "step": 6510 }, { "epoch": 8.728246318607765, "grad_norm": 0.4060622453689575, "learning_rate": 9.993693088045939e-05, "loss": 0.0221, "step": 6520 }, { "epoch": 8.741633199464525, "grad_norm": 0.2538658082485199, "learning_rate": 9.99360979214268e-05, "loss": 0.0205, "step": 6530 }, { "epoch": 8.755020080321286, "grad_norm": 0.28581032156944275, "learning_rate": 9.99352595014576e-05, "loss": 0.0193, "step": 6540 }, { "epoch": 8.768406961178046, "grad_norm": 0.376010537147522, "learning_rate": 9.993441562064354e-05, "loss": 0.0208, "step": 6550 }, { "epoch": 8.781793842034805, "grad_norm": 0.29643580317497253, "learning_rate": 9.993356627907685e-05, "loss": 0.0212, "step": 6560 }, { "epoch": 8.795180722891565, "grad_norm": 0.31445780396461487, "learning_rate": 9.99327114768504e-05, "loss": 0.0233, "step": 6570 }, { "epoch": 8.808567603748326, "grad_norm": 0.18389198184013367, "learning_rate": 9.99318512140577e-05, "loss": 0.0195, "step": 6580 }, { "epoch": 8.821954484605087, "grad_norm": 0.21512706577777863, "learning_rate": 9.993098549079284e-05, "loss": 0.0205, "step": 6590 }, { "epoch": 8.835341365461847, "grad_norm": 0.14787006378173828, "learning_rate": 9.993011430715047e-05, "loss": 0.0194, "step": 6600 }, { "epoch": 8.848728246318608, "grad_norm": 0.17550991475582123, "learning_rate": 9.992923766322586e-05, "loss": 0.0269, "step": 6610 }, { "epoch": 8.862115127175368, "grad_norm": 0.536027729511261, "learning_rate": 9.99283555591149e-05, "loss": 0.0225, "step": 6620 }, { "epoch": 8.875502008032129, "grad_norm": 0.30213481187820435, "learning_rate": 9.992746799491404e-05, "loss": 0.0234, "step": 6630 }, { "epoch": 8.88888888888889, "grad_norm": 0.37450817227363586, "learning_rate": 9.992657497072033e-05, "loss": 0.0258, "step": 6640 }, { "epoch": 8.90227576974565, "grad_norm": 0.3938837945461273, "learning_rate": 9.992567648663147e-05, "loss": 0.026, "step": 6650 }, { "epoch": 8.91566265060241, "grad_norm": 0.3485872447490692, "learning_rate": 9.992477254274568e-05, "loss": 0.0232, "step": 6660 }, { "epoch": 8.929049531459171, "grad_norm": 0.32787463068962097, "learning_rate": 9.992386313916183e-05, "loss": 0.0227, "step": 6670 }, { "epoch": 8.94243641231593, "grad_norm": 0.2626693546772003, "learning_rate": 9.992294827597934e-05, "loss": 0.0206, "step": 6680 }, { "epoch": 8.95582329317269, "grad_norm": 0.2340930998325348, "learning_rate": 9.992202795329831e-05, "loss": 0.0197, "step": 6690 }, { "epoch": 8.96921017402945, "grad_norm": 0.15280868113040924, "learning_rate": 9.992110217121936e-05, "loss": 0.0219, "step": 6700 }, { "epoch": 8.982597054886211, "grad_norm": 0.1574825644493103, "learning_rate": 9.992017092984372e-05, "loss": 0.0198, "step": 6710 }, { "epoch": 8.995983935742972, "grad_norm": 0.23020701110363007, "learning_rate": 9.991923422927326e-05, "loss": 0.0221, "step": 6720 }, { "epoch": 9.009370816599732, "grad_norm": 0.39948830008506775, "learning_rate": 9.991829206961037e-05, "loss": 0.0231, "step": 6730 }, { "epoch": 9.022757697456493, "grad_norm": 0.4191053509712219, "learning_rate": 9.991734445095813e-05, "loss": 0.0227, "step": 6740 }, { "epoch": 9.036144578313253, "grad_norm": 0.28061914443969727, "learning_rate": 9.991639137342015e-05, "loss": 0.0197, "step": 6750 }, { "epoch": 9.049531459170014, "grad_norm": 0.32710957527160645, "learning_rate": 9.991543283710064e-05, "loss": 0.0253, "step": 6760 }, { "epoch": 9.062918340026775, "grad_norm": 0.3671044111251831, "learning_rate": 9.991446884210445e-05, "loss": 0.0192, "step": 6770 }, { "epoch": 9.076305220883533, "grad_norm": 0.23177185654640198, "learning_rate": 9.9913499388537e-05, "loss": 0.0226, "step": 6780 }, { "epoch": 9.089692101740294, "grad_norm": 0.3326265215873718, "learning_rate": 9.99125244765043e-05, "loss": 0.0202, "step": 6790 }, { "epoch": 9.103078982597054, "grad_norm": 0.4338203966617584, "learning_rate": 9.991154410611296e-05, "loss": 0.023, "step": 6800 }, { "epoch": 9.116465863453815, "grad_norm": 0.17652706801891327, "learning_rate": 9.99105582774702e-05, "loss": 0.0193, "step": 6810 }, { "epoch": 9.129852744310575, "grad_norm": 0.25114905834198, "learning_rate": 9.990956699068384e-05, "loss": 0.0229, "step": 6820 }, { "epoch": 9.143239625167336, "grad_norm": 0.5123518705368042, "learning_rate": 9.990857024586224e-05, "loss": 0.0202, "step": 6830 }, { "epoch": 9.156626506024097, "grad_norm": 0.7877902984619141, "learning_rate": 9.990756804311446e-05, "loss": 0.0196, "step": 6840 }, { "epoch": 9.170013386880857, "grad_norm": 0.3160996437072754, "learning_rate": 9.990656038255006e-05, "loss": 0.0218, "step": 6850 }, { "epoch": 9.183400267737618, "grad_norm": 0.6226359009742737, "learning_rate": 9.990554726427926e-05, "loss": 0.021, "step": 6860 }, { "epoch": 9.196787148594378, "grad_norm": 0.311543732881546, "learning_rate": 9.990452868841284e-05, "loss": 0.0206, "step": 6870 }, { "epoch": 9.210174029451139, "grad_norm": 0.29951420426368713, "learning_rate": 9.99035046550622e-05, "loss": 0.0215, "step": 6880 }, { "epoch": 9.223560910307897, "grad_norm": 0.3183573782444, "learning_rate": 9.99024751643393e-05, "loss": 0.0194, "step": 6890 }, { "epoch": 9.236947791164658, "grad_norm": 2.4099748134613037, "learning_rate": 9.990144021635677e-05, "loss": 0.0208, "step": 6900 }, { "epoch": 9.250334672021419, "grad_norm": 0.27045825123786926, "learning_rate": 9.990039981122775e-05, "loss": 0.0258, "step": 6910 }, { "epoch": 9.263721552878179, "grad_norm": 0.32180196046829224, "learning_rate": 9.989935394906602e-05, "loss": 0.0241, "step": 6920 }, { "epoch": 9.27710843373494, "grad_norm": 0.3160844147205353, "learning_rate": 9.989830262998598e-05, "loss": 0.0203, "step": 6930 }, { "epoch": 9.2904953145917, "grad_norm": 0.2881385385990143, "learning_rate": 9.989724585410259e-05, "loss": 0.0218, "step": 6940 }, { "epoch": 9.30388219544846, "grad_norm": 0.28545138239860535, "learning_rate": 9.989618362153139e-05, "loss": 0.0184, "step": 6950 }, { "epoch": 9.317269076305221, "grad_norm": 0.15262290835380554, "learning_rate": 9.989511593238859e-05, "loss": 0.0231, "step": 6960 }, { "epoch": 9.330655957161982, "grad_norm": 0.23986399173736572, "learning_rate": 9.98940427867909e-05, "loss": 0.0207, "step": 6970 }, { "epoch": 9.344042838018742, "grad_norm": 0.2778412997722626, "learning_rate": 9.989296418485573e-05, "loss": 0.0202, "step": 6980 }, { "epoch": 9.357429718875501, "grad_norm": 0.378902405500412, "learning_rate": 9.989188012670101e-05, "loss": 0.0185, "step": 6990 }, { "epoch": 9.370816599732262, "grad_norm": 0.3372129797935486, "learning_rate": 9.989079061244528e-05, "loss": 0.0211, "step": 7000 }, { "epoch": 9.384203480589022, "grad_norm": 0.3455948233604431, "learning_rate": 9.988969564220769e-05, "loss": 0.0197, "step": 7010 }, { "epoch": 9.397590361445783, "grad_norm": 0.2826843857765198, "learning_rate": 9.988859521610801e-05, "loss": 0.0203, "step": 7020 }, { "epoch": 9.410977242302543, "grad_norm": 0.24930432438850403, "learning_rate": 9.988748933426656e-05, "loss": 0.0198, "step": 7030 }, { "epoch": 9.424364123159304, "grad_norm": 0.30395829677581787, "learning_rate": 9.988637799680428e-05, "loss": 0.0195, "step": 7040 }, { "epoch": 9.437751004016064, "grad_norm": 0.3397486209869385, "learning_rate": 9.98852612038427e-05, "loss": 0.0195, "step": 7050 }, { "epoch": 9.451137884872825, "grad_norm": 0.34288567304611206, "learning_rate": 9.988413895550397e-05, "loss": 0.0205, "step": 7060 }, { "epoch": 9.464524765729585, "grad_norm": 0.359190970659256, "learning_rate": 9.98830112519108e-05, "loss": 0.0253, "step": 7070 }, { "epoch": 9.477911646586346, "grad_norm": 0.2562154531478882, "learning_rate": 9.98818780931865e-05, "loss": 0.0222, "step": 7080 }, { "epoch": 9.491298527443107, "grad_norm": 0.22686918079853058, "learning_rate": 9.988073947945502e-05, "loss": 0.0205, "step": 7090 }, { "epoch": 9.504685408299865, "grad_norm": 0.20271334052085876, "learning_rate": 9.987959541084087e-05, "loss": 0.023, "step": 7100 }, { "epoch": 9.518072289156626, "grad_norm": 0.34943723678588867, "learning_rate": 9.987844588746915e-05, "loss": 0.0193, "step": 7110 }, { "epoch": 9.531459170013386, "grad_norm": 0.4308094084262848, "learning_rate": 9.987729090946558e-05, "loss": 0.0233, "step": 7120 }, { "epoch": 9.544846050870147, "grad_norm": 0.4777730107307434, "learning_rate": 9.987613047695647e-05, "loss": 0.0212, "step": 7130 }, { "epoch": 9.558232931726907, "grad_norm": 0.2299499362707138, "learning_rate": 9.987496459006871e-05, "loss": 0.0199, "step": 7140 }, { "epoch": 9.571619812583668, "grad_norm": 0.1884339451789856, "learning_rate": 9.987379324892982e-05, "loss": 0.0227, "step": 7150 }, { "epoch": 9.585006693440429, "grad_norm": 0.23437385261058807, "learning_rate": 9.987261645366788e-05, "loss": 0.0189, "step": 7160 }, { "epoch": 9.598393574297189, "grad_norm": 0.26546522974967957, "learning_rate": 9.987143420441158e-05, "loss": 0.0203, "step": 7170 }, { "epoch": 9.61178045515395, "grad_norm": 0.2466980516910553, "learning_rate": 9.987024650129022e-05, "loss": 0.0202, "step": 7180 }, { "epoch": 9.62516733601071, "grad_norm": 0.6298662424087524, "learning_rate": 9.986905334443368e-05, "loss": 0.022, "step": 7190 }, { "epoch": 9.638554216867469, "grad_norm": 0.4233785569667816, "learning_rate": 9.986785473397245e-05, "loss": 0.0223, "step": 7200 }, { "epoch": 9.65194109772423, "grad_norm": 0.39249688386917114, "learning_rate": 9.98666506700376e-05, "loss": 0.0205, "step": 7210 }, { "epoch": 9.66532797858099, "grad_norm": 1.0795083045959473, "learning_rate": 9.986544115276081e-05, "loss": 0.0201, "step": 7220 }, { "epoch": 9.67871485943775, "grad_norm": 0.3895474374294281, "learning_rate": 9.986422618227433e-05, "loss": 0.0182, "step": 7230 }, { "epoch": 9.692101740294511, "grad_norm": 0.2714416980743408, "learning_rate": 9.986300575871106e-05, "loss": 0.0237, "step": 7240 }, { "epoch": 9.705488621151272, "grad_norm": 0.2509737014770508, "learning_rate": 9.986177988220444e-05, "loss": 0.0216, "step": 7250 }, { "epoch": 9.718875502008032, "grad_norm": 0.24178212881088257, "learning_rate": 9.986054855288856e-05, "loss": 0.0192, "step": 7260 }, { "epoch": 9.732262382864793, "grad_norm": 0.40692585706710815, "learning_rate": 9.985931177089802e-05, "loss": 0.0193, "step": 7270 }, { "epoch": 9.745649263721553, "grad_norm": 0.1825643628835678, "learning_rate": 9.985806953636814e-05, "loss": 0.0201, "step": 7280 }, { "epoch": 9.759036144578314, "grad_norm": 0.27013465762138367, "learning_rate": 9.985682184943471e-05, "loss": 0.0215, "step": 7290 }, { "epoch": 9.772423025435074, "grad_norm": 0.23322878777980804, "learning_rate": 9.98555687102342e-05, "loss": 0.0179, "step": 7300 }, { "epoch": 9.785809906291835, "grad_norm": 0.21651948988437653, "learning_rate": 9.985431011890367e-05, "loss": 0.021, "step": 7310 }, { "epoch": 9.799196787148594, "grad_norm": 0.23862656950950623, "learning_rate": 9.985304607558075e-05, "loss": 0.0201, "step": 7320 }, { "epoch": 9.812583668005354, "grad_norm": 0.9474694728851318, "learning_rate": 9.985177658040364e-05, "loss": 0.0223, "step": 7330 }, { "epoch": 9.825970548862115, "grad_norm": 0.301957368850708, "learning_rate": 9.985050163351119e-05, "loss": 0.0197, "step": 7340 }, { "epoch": 9.839357429718875, "grad_norm": 0.2969225347042084, "learning_rate": 9.984922123504286e-05, "loss": 0.0229, "step": 7350 }, { "epoch": 9.852744310575636, "grad_norm": 0.2634296417236328, "learning_rate": 9.984793538513862e-05, "loss": 0.0232, "step": 7360 }, { "epoch": 9.866131191432396, "grad_norm": 0.36580270528793335, "learning_rate": 9.984664408393912e-05, "loss": 0.0191, "step": 7370 }, { "epoch": 9.879518072289157, "grad_norm": 0.22180905938148499, "learning_rate": 9.984534733158556e-05, "loss": 0.0224, "step": 7380 }, { "epoch": 9.892904953145917, "grad_norm": 0.2095687985420227, "learning_rate": 9.984404512821977e-05, "loss": 0.0228, "step": 7390 }, { "epoch": 9.906291834002678, "grad_norm": 0.38737085461616516, "learning_rate": 9.984273747398411e-05, "loss": 0.0199, "step": 7400 }, { "epoch": 9.919678714859439, "grad_norm": 0.2970012128353119, "learning_rate": 9.984142436902165e-05, "loss": 0.0174, "step": 7410 }, { "epoch": 9.933065595716197, "grad_norm": 0.3260557949542999, "learning_rate": 9.984010581347596e-05, "loss": 0.0199, "step": 7420 }, { "epoch": 9.946452476572958, "grad_norm": 0.1472177505493164, "learning_rate": 9.983878180749121e-05, "loss": 0.0199, "step": 7430 }, { "epoch": 9.959839357429718, "grad_norm": 0.622674822807312, "learning_rate": 9.983745235121222e-05, "loss": 0.021, "step": 7440 }, { "epoch": 9.973226238286479, "grad_norm": 0.2997741997241974, "learning_rate": 9.983611744478438e-05, "loss": 0.0208, "step": 7450 }, { "epoch": 9.98661311914324, "grad_norm": 0.6222604513168335, "learning_rate": 9.983477708835365e-05, "loss": 0.0193, "step": 7460 }, { "epoch": 10.0, "grad_norm": 0.29468122124671936, "learning_rate": 9.983343128206664e-05, "loss": 0.0239, "step": 7470 }, { "epoch": 10.01338688085676, "grad_norm": 0.221283957362175, "learning_rate": 9.983208002607049e-05, "loss": 0.0238, "step": 7480 }, { "epoch": 10.026773761713521, "grad_norm": 0.13462701439857483, "learning_rate": 9.9830723320513e-05, "loss": 0.02, "step": 7490 }, { "epoch": 10.040160642570282, "grad_norm": 0.295163631439209, "learning_rate": 9.982936116554254e-05, "loss": 0.0212, "step": 7500 }, { "epoch": 10.053547523427042, "grad_norm": 0.3392113149166107, "learning_rate": 9.982799356130803e-05, "loss": 0.0208, "step": 7510 }, { "epoch": 10.066934404283803, "grad_norm": 0.20246422290802002, "learning_rate": 9.982662050795908e-05, "loss": 0.0204, "step": 7520 }, { "epoch": 10.080321285140561, "grad_norm": 0.20394930243492126, "learning_rate": 9.982524200564583e-05, "loss": 0.0188, "step": 7530 }, { "epoch": 10.093708165997322, "grad_norm": 0.21694763004779816, "learning_rate": 9.982385805451901e-05, "loss": 0.0198, "step": 7540 }, { "epoch": 10.107095046854083, "grad_norm": 0.2619397044181824, "learning_rate": 9.982246865472998e-05, "loss": 0.0191, "step": 7550 }, { "epoch": 10.120481927710843, "grad_norm": 0.20197276771068573, "learning_rate": 9.982107380643069e-05, "loss": 0.0199, "step": 7560 }, { "epoch": 10.133868808567604, "grad_norm": 0.19355207681655884, "learning_rate": 9.981967350977368e-05, "loss": 0.0238, "step": 7570 }, { "epoch": 10.147255689424364, "grad_norm": 0.3536096215248108, "learning_rate": 9.981826776491208e-05, "loss": 0.0191, "step": 7580 }, { "epoch": 10.160642570281125, "grad_norm": 0.7228317856788635, "learning_rate": 9.98168565719996e-05, "loss": 0.0172, "step": 7590 }, { "epoch": 10.174029451137885, "grad_norm": 0.18418022990226746, "learning_rate": 9.98154399311906e-05, "loss": 0.0188, "step": 7600 }, { "epoch": 10.187416331994646, "grad_norm": 0.2462201863527298, "learning_rate": 9.981401784263997e-05, "loss": 0.0181, "step": 7610 }, { "epoch": 10.200803212851406, "grad_norm": 0.3159240782260895, "learning_rate": 9.981259030650326e-05, "loss": 0.0192, "step": 7620 }, { "epoch": 10.214190093708165, "grad_norm": 0.3027210533618927, "learning_rate": 9.981115732293655e-05, "loss": 0.0214, "step": 7630 }, { "epoch": 10.227576974564926, "grad_norm": 0.19143404066562653, "learning_rate": 9.980971889209659e-05, "loss": 0.021, "step": 7640 }, { "epoch": 10.240963855421686, "grad_norm": 0.2977609634399414, "learning_rate": 9.980827501414064e-05, "loss": 0.0211, "step": 7650 }, { "epoch": 10.254350736278447, "grad_norm": 0.2706032395362854, "learning_rate": 9.980682568922663e-05, "loss": 0.0198, "step": 7660 }, { "epoch": 10.267737617135207, "grad_norm": 0.33562013506889343, "learning_rate": 9.980537091751304e-05, "loss": 0.0226, "step": 7670 }, { "epoch": 10.281124497991968, "grad_norm": 0.40685829520225525, "learning_rate": 9.980391069915897e-05, "loss": 0.0205, "step": 7680 }, { "epoch": 10.294511378848728, "grad_norm": 0.4151432514190674, "learning_rate": 9.98024450343241e-05, "loss": 0.0179, "step": 7690 }, { "epoch": 10.307898259705489, "grad_norm": 0.3738119602203369, "learning_rate": 9.980097392316872e-05, "loss": 0.0203, "step": 7700 }, { "epoch": 10.32128514056225, "grad_norm": 0.5733925700187683, "learning_rate": 9.97994973658537e-05, "loss": 0.0234, "step": 7710 }, { "epoch": 10.33467202141901, "grad_norm": 0.38172492384910583, "learning_rate": 9.979801536254054e-05, "loss": 0.0224, "step": 7720 }, { "epoch": 10.34805890227577, "grad_norm": 0.4093788266181946, "learning_rate": 9.979652791339127e-05, "loss": 0.0237, "step": 7730 }, { "epoch": 10.36144578313253, "grad_norm": 0.4307501018047333, "learning_rate": 9.97950350185686e-05, "loss": 0.0198, "step": 7740 }, { "epoch": 10.37483266398929, "grad_norm": 0.2657404839992523, "learning_rate": 9.979353667823574e-05, "loss": 0.0211, "step": 7750 }, { "epoch": 10.38821954484605, "grad_norm": 0.5100812911987305, "learning_rate": 9.979203289255658e-05, "loss": 0.0206, "step": 7760 }, { "epoch": 10.401606425702811, "grad_norm": 0.2847917675971985, "learning_rate": 9.979052366169557e-05, "loss": 0.0223, "step": 7770 }, { "epoch": 10.414993306559571, "grad_norm": 0.3979837894439697, "learning_rate": 9.978900898581775e-05, "loss": 0.0184, "step": 7780 }, { "epoch": 10.428380187416332, "grad_norm": 0.22378142178058624, "learning_rate": 9.978748886508875e-05, "loss": 0.0194, "step": 7790 }, { "epoch": 10.441767068273093, "grad_norm": 0.23060336709022522, "learning_rate": 9.978596329967484e-05, "loss": 0.0179, "step": 7800 }, { "epoch": 10.455153949129853, "grad_norm": 0.38435542583465576, "learning_rate": 9.978443228974284e-05, "loss": 0.0186, "step": 7810 }, { "epoch": 10.468540829986614, "grad_norm": 0.2924882769584656, "learning_rate": 9.978289583546015e-05, "loss": 0.019, "step": 7820 }, { "epoch": 10.481927710843374, "grad_norm": 0.23482327163219452, "learning_rate": 9.978135393699484e-05, "loss": 0.0238, "step": 7830 }, { "epoch": 10.495314591700135, "grad_norm": 0.3500645160675049, "learning_rate": 9.977980659451548e-05, "loss": 0.0209, "step": 7840 }, { "epoch": 10.508701472556893, "grad_norm": 0.2628474831581116, "learning_rate": 9.977825380819135e-05, "loss": 0.0206, "step": 7850 }, { "epoch": 10.522088353413654, "grad_norm": 0.16692160069942474, "learning_rate": 9.97766955781922e-05, "loss": 0.0202, "step": 7860 }, { "epoch": 10.535475234270415, "grad_norm": 0.28524190187454224, "learning_rate": 9.977513190468848e-05, "loss": 0.0189, "step": 7870 }, { "epoch": 10.548862115127175, "grad_norm": 0.2183331400156021, "learning_rate": 9.977356278785116e-05, "loss": 0.0188, "step": 7880 }, { "epoch": 10.562248995983936, "grad_norm": 0.3754710853099823, "learning_rate": 9.977198822785184e-05, "loss": 0.0207, "step": 7890 }, { "epoch": 10.575635876840696, "grad_norm": 0.34538981318473816, "learning_rate": 9.977040822486273e-05, "loss": 0.0197, "step": 7900 }, { "epoch": 10.589022757697457, "grad_norm": 0.33826398849487305, "learning_rate": 9.97688227790566e-05, "loss": 0.018, "step": 7910 }, { "epoch": 10.602409638554217, "grad_norm": 0.28578299283981323, "learning_rate": 9.976723189060684e-05, "loss": 0.0203, "step": 7920 }, { "epoch": 10.615796519410978, "grad_norm": 0.31935518980026245, "learning_rate": 9.976563555968742e-05, "loss": 0.0218, "step": 7930 }, { "epoch": 10.629183400267738, "grad_norm": 0.25848257541656494, "learning_rate": 9.976403378647292e-05, "loss": 0.0195, "step": 7940 }, { "epoch": 10.642570281124499, "grad_norm": 1.7542800903320312, "learning_rate": 9.97624265711385e-05, "loss": 0.0241, "step": 7950 }, { "epoch": 10.655957161981258, "grad_norm": 0.3620746433734894, "learning_rate": 9.976081391385993e-05, "loss": 0.0241, "step": 7960 }, { "epoch": 10.669344042838018, "grad_norm": 0.21337364614009857, "learning_rate": 9.975919581481356e-05, "loss": 0.0255, "step": 7970 }, { "epoch": 10.682730923694779, "grad_norm": 0.24319782853126526, "learning_rate": 9.975757227417634e-05, "loss": 0.0208, "step": 7980 }, { "epoch": 10.69611780455154, "grad_norm": 0.2700795531272888, "learning_rate": 9.975594329212586e-05, "loss": 0.0227, "step": 7990 }, { "epoch": 10.7095046854083, "grad_norm": 0.3208879828453064, "learning_rate": 9.97543088688402e-05, "loss": 0.0178, "step": 8000 }, { "epoch": 10.72289156626506, "grad_norm": 0.4355655312538147, "learning_rate": 9.975266900449814e-05, "loss": 0.0194, "step": 8010 }, { "epoch": 10.736278447121821, "grad_norm": 0.25261566042900085, "learning_rate": 9.975102369927898e-05, "loss": 0.0174, "step": 8020 }, { "epoch": 10.749665327978581, "grad_norm": 0.1939247101545334, "learning_rate": 9.974937295336269e-05, "loss": 0.0203, "step": 8030 }, { "epoch": 10.763052208835342, "grad_norm": 0.25619304180145264, "learning_rate": 9.974771676692975e-05, "loss": 0.0188, "step": 8040 }, { "epoch": 10.776439089692103, "grad_norm": 0.3491668701171875, "learning_rate": 9.974605514016131e-05, "loss": 0.0213, "step": 8050 }, { "epoch": 10.789825970548861, "grad_norm": 0.24376115202903748, "learning_rate": 9.974438807323907e-05, "loss": 0.0218, "step": 8060 }, { "epoch": 10.803212851405622, "grad_norm": 0.279751181602478, "learning_rate": 9.974271556634535e-05, "loss": 0.0227, "step": 8070 }, { "epoch": 10.816599732262382, "grad_norm": 0.32077863812446594, "learning_rate": 9.974103761966302e-05, "loss": 0.0195, "step": 8080 }, { "epoch": 10.829986613119143, "grad_norm": 0.5643482804298401, "learning_rate": 9.973935423337563e-05, "loss": 0.0209, "step": 8090 }, { "epoch": 10.843373493975903, "grad_norm": 0.40347275137901306, "learning_rate": 9.973766540766722e-05, "loss": 0.0251, "step": 8100 }, { "epoch": 10.856760374832664, "grad_norm": 0.220279723405838, "learning_rate": 9.97359711427225e-05, "loss": 0.0201, "step": 8110 }, { "epoch": 10.870147255689425, "grad_norm": 0.24241286516189575, "learning_rate": 9.973427143872677e-05, "loss": 0.0178, "step": 8120 }, { "epoch": 10.883534136546185, "grad_norm": 0.2349054366350174, "learning_rate": 9.973256629586589e-05, "loss": 0.0221, "step": 8130 }, { "epoch": 10.896921017402946, "grad_norm": 0.21950332820415497, "learning_rate": 9.973085571432632e-05, "loss": 0.0198, "step": 8140 }, { "epoch": 10.910307898259706, "grad_norm": 0.26989591121673584, "learning_rate": 9.972913969429513e-05, "loss": 0.0198, "step": 8150 }, { "epoch": 10.923694779116467, "grad_norm": 0.347705602645874, "learning_rate": 9.972741823596e-05, "loss": 0.0198, "step": 8160 }, { "epoch": 10.937081659973225, "grad_norm": 0.2678275406360626, "learning_rate": 9.972569133950917e-05, "loss": 0.0169, "step": 8170 }, { "epoch": 10.950468540829986, "grad_norm": 0.5307143926620483, "learning_rate": 9.972395900513151e-05, "loss": 0.0233, "step": 8180 }, { "epoch": 10.963855421686747, "grad_norm": 0.3081524074077606, "learning_rate": 9.972222123301645e-05, "loss": 0.0206, "step": 8190 }, { "epoch": 10.977242302543507, "grad_norm": 0.21207092702388763, "learning_rate": 9.972047802335403e-05, "loss": 0.0211, "step": 8200 }, { "epoch": 10.990629183400268, "grad_norm": 0.29545465111732483, "learning_rate": 9.971872937633488e-05, "loss": 0.0211, "step": 8210 }, { "epoch": 11.004016064257028, "grad_norm": 0.3222421705722809, "learning_rate": 9.971697529215024e-05, "loss": 0.0205, "step": 8220 }, { "epoch": 11.017402945113789, "grad_norm": 0.31549277901649475, "learning_rate": 9.971521577099192e-05, "loss": 0.0182, "step": 8230 }, { "epoch": 11.03078982597055, "grad_norm": 0.2268773317337036, "learning_rate": 9.971345081305236e-05, "loss": 0.0176, "step": 8240 }, { "epoch": 11.04417670682731, "grad_norm": 0.20218466222286224, "learning_rate": 9.971168041852456e-05, "loss": 0.0192, "step": 8250 }, { "epoch": 11.05756358768407, "grad_norm": 0.21784308552742004, "learning_rate": 9.970990458760215e-05, "loss": 0.0198, "step": 8260 }, { "epoch": 11.070950468540829, "grad_norm": 0.3277576267719269, "learning_rate": 9.970812332047929e-05, "loss": 0.0197, "step": 8270 }, { "epoch": 11.08433734939759, "grad_norm": 0.28151193261146545, "learning_rate": 9.97063366173508e-05, "loss": 0.0193, "step": 8280 }, { "epoch": 11.09772423025435, "grad_norm": 0.39086779952049255, "learning_rate": 9.970454447841207e-05, "loss": 0.0195, "step": 8290 }, { "epoch": 11.11111111111111, "grad_norm": 0.3111569285392761, "learning_rate": 9.970274690385909e-05, "loss": 0.0217, "step": 8300 }, { "epoch": 11.124497991967871, "grad_norm": 0.33569541573524475, "learning_rate": 9.970094389388844e-05, "loss": 0.0227, "step": 8310 }, { "epoch": 11.137884872824632, "grad_norm": 0.29616016149520874, "learning_rate": 9.969913544869728e-05, "loss": 0.0221, "step": 8320 }, { "epoch": 11.151271753681392, "grad_norm": 0.20347073674201965, "learning_rate": 9.96973215684834e-05, "loss": 0.0178, "step": 8330 }, { "epoch": 11.164658634538153, "grad_norm": 0.19260157644748688, "learning_rate": 9.969550225344513e-05, "loss": 0.019, "step": 8340 }, { "epoch": 11.178045515394913, "grad_norm": 0.4753871560096741, "learning_rate": 9.969367750378147e-05, "loss": 0.0191, "step": 8350 }, { "epoch": 11.191432396251674, "grad_norm": 0.24015405774116516, "learning_rate": 9.969184731969194e-05, "loss": 0.0221, "step": 8360 }, { "epoch": 11.204819277108435, "grad_norm": 0.24290168285369873, "learning_rate": 9.96900117013767e-05, "loss": 0.0191, "step": 8370 }, { "epoch": 11.218206157965193, "grad_norm": 0.19939252734184265, "learning_rate": 9.96881706490365e-05, "loss": 0.0171, "step": 8380 }, { "epoch": 11.231593038821954, "grad_norm": 0.2266586571931839, "learning_rate": 9.968632416287265e-05, "loss": 0.0177, "step": 8390 }, { "epoch": 11.244979919678714, "grad_norm": 0.23296195268630981, "learning_rate": 9.96844722430871e-05, "loss": 0.0193, "step": 8400 }, { "epoch": 11.258366800535475, "grad_norm": 0.27310556173324585, "learning_rate": 9.968261488988235e-05, "loss": 0.0229, "step": 8410 }, { "epoch": 11.271753681392235, "grad_norm": 0.5468508005142212, "learning_rate": 9.968075210346155e-05, "loss": 0.0207, "step": 8420 }, { "epoch": 11.285140562248996, "grad_norm": 0.32895317673683167, "learning_rate": 9.967888388402839e-05, "loss": 0.0207, "step": 8430 }, { "epoch": 11.298527443105757, "grad_norm": 0.34210050106048584, "learning_rate": 9.967701023178717e-05, "loss": 0.0195, "step": 8440 }, { "epoch": 11.311914323962517, "grad_norm": 0.2635754346847534, "learning_rate": 9.967513114694282e-05, "loss": 0.0203, "step": 8450 }, { "epoch": 11.325301204819278, "grad_norm": 0.6341689229011536, "learning_rate": 9.967324662970079e-05, "loss": 0.0239, "step": 8460 }, { "epoch": 11.338688085676038, "grad_norm": 0.9386224746704102, "learning_rate": 9.96713566802672e-05, "loss": 0.0181, "step": 8470 }, { "epoch": 11.352074966532799, "grad_norm": 0.2325306236743927, "learning_rate": 9.966946129884873e-05, "loss": 0.0192, "step": 8480 }, { "epoch": 11.365461847389557, "grad_norm": 0.24127978086471558, "learning_rate": 9.966756048565265e-05, "loss": 0.0233, "step": 8490 }, { "epoch": 11.378848728246318, "grad_norm": 0.22810013592243195, "learning_rate": 9.966565424088681e-05, "loss": 0.0166, "step": 8500 }, { "epoch": 11.392235609103079, "grad_norm": 0.279922217130661, "learning_rate": 9.96637425647597e-05, "loss": 0.0222, "step": 8510 }, { "epoch": 11.405622489959839, "grad_norm": 0.24281013011932373, "learning_rate": 9.966182545748038e-05, "loss": 0.02, "step": 8520 }, { "epoch": 11.4190093708166, "grad_norm": 0.3766328692436218, "learning_rate": 9.96599029192585e-05, "loss": 0.019, "step": 8530 }, { "epoch": 11.43239625167336, "grad_norm": 0.26764923334121704, "learning_rate": 9.965797495030428e-05, "loss": 0.0192, "step": 8540 }, { "epoch": 11.44578313253012, "grad_norm": 0.18024525046348572, "learning_rate": 9.96560415508286e-05, "loss": 0.0196, "step": 8550 }, { "epoch": 11.459170013386881, "grad_norm": 0.4600423574447632, "learning_rate": 9.965410272104286e-05, "loss": 0.0192, "step": 8560 }, { "epoch": 11.472556894243642, "grad_norm": 0.282204270362854, "learning_rate": 9.96521584611591e-05, "loss": 0.021, "step": 8570 }, { "epoch": 11.485943775100402, "grad_norm": 0.42792248725891113, "learning_rate": 9.965020877138994e-05, "loss": 0.0236, "step": 8580 }, { "epoch": 11.499330655957163, "grad_norm": 0.23073525726795197, "learning_rate": 9.964825365194861e-05, "loss": 0.0195, "step": 8590 }, { "epoch": 11.512717536813922, "grad_norm": 0.25790736079216003, "learning_rate": 9.96462931030489e-05, "loss": 0.0177, "step": 8600 }, { "epoch": 11.526104417670682, "grad_norm": 0.2635025084018707, "learning_rate": 9.96443271249052e-05, "loss": 0.018, "step": 8610 }, { "epoch": 11.539491298527443, "grad_norm": 0.23839715123176575, "learning_rate": 9.964235571773255e-05, "loss": 0.0186, "step": 8620 }, { "epoch": 11.552878179384203, "grad_norm": 0.26998376846313477, "learning_rate": 9.96403788817465e-05, "loss": 0.0201, "step": 8630 }, { "epoch": 11.566265060240964, "grad_norm": 0.2737628221511841, "learning_rate": 9.963839661716325e-05, "loss": 0.0186, "step": 8640 }, { "epoch": 11.579651941097724, "grad_norm": 0.33973339200019836, "learning_rate": 9.963640892419958e-05, "loss": 0.0186, "step": 8650 }, { "epoch": 11.593038821954485, "grad_norm": 0.1816876232624054, "learning_rate": 9.963441580307286e-05, "loss": 0.0191, "step": 8660 }, { "epoch": 11.606425702811245, "grad_norm": 0.17476855218410492, "learning_rate": 9.963241725400104e-05, "loss": 0.0182, "step": 8670 }, { "epoch": 11.619812583668006, "grad_norm": 0.4330703020095825, "learning_rate": 9.963041327720271e-05, "loss": 0.0228, "step": 8680 }, { "epoch": 11.633199464524766, "grad_norm": 0.3932592570781708, "learning_rate": 9.962840387289697e-05, "loss": 0.0235, "step": 8690 }, { "epoch": 11.646586345381525, "grad_norm": 0.34588801860809326, "learning_rate": 9.962638904130363e-05, "loss": 0.0171, "step": 8700 }, { "epoch": 11.659973226238286, "grad_norm": 0.22707563638687134, "learning_rate": 9.962436878264298e-05, "loss": 0.0179, "step": 8710 }, { "epoch": 11.673360107095046, "grad_norm": 0.2873688340187073, "learning_rate": 9.962234309713598e-05, "loss": 0.0198, "step": 8720 }, { "epoch": 11.686746987951807, "grad_norm": 0.16917447745800018, "learning_rate": 9.962031198500414e-05, "loss": 0.0165, "step": 8730 }, { "epoch": 11.700133868808567, "grad_norm": 0.28593865036964417, "learning_rate": 9.961827544646958e-05, "loss": 0.0181, "step": 8740 }, { "epoch": 11.713520749665328, "grad_norm": 0.328218549489975, "learning_rate": 9.961623348175501e-05, "loss": 0.0193, "step": 8750 }, { "epoch": 11.726907630522089, "grad_norm": 0.285862535238266, "learning_rate": 9.961418609108377e-05, "loss": 0.0148, "step": 8760 }, { "epoch": 11.740294511378849, "grad_norm": 0.23223668336868286, "learning_rate": 9.961213327467971e-05, "loss": 0.0167, "step": 8770 }, { "epoch": 11.75368139223561, "grad_norm": 0.2557608485221863, "learning_rate": 9.961007503276736e-05, "loss": 0.0213, "step": 8780 }, { "epoch": 11.76706827309237, "grad_norm": 0.16925902664661407, "learning_rate": 9.960801136557179e-05, "loss": 0.0181, "step": 8790 }, { "epoch": 11.78045515394913, "grad_norm": 1.2366642951965332, "learning_rate": 9.960594227331866e-05, "loss": 0.0204, "step": 8800 }, { "epoch": 11.79384203480589, "grad_norm": 0.3596659004688263, "learning_rate": 9.960386775623429e-05, "loss": 0.0164, "step": 8810 }, { "epoch": 11.80722891566265, "grad_norm": 0.5558513402938843, "learning_rate": 9.96017878145455e-05, "loss": 0.0193, "step": 8820 }, { "epoch": 11.82061579651941, "grad_norm": 0.31854352355003357, "learning_rate": 9.959970244847977e-05, "loss": 0.0195, "step": 8830 }, { "epoch": 11.834002677376171, "grad_norm": 0.46039071679115295, "learning_rate": 9.959761165826518e-05, "loss": 0.0222, "step": 8840 }, { "epoch": 11.847389558232932, "grad_norm": 0.43662771582603455, "learning_rate": 9.959551544413033e-05, "loss": 0.0201, "step": 8850 }, { "epoch": 11.860776439089692, "grad_norm": 0.2865435481071472, "learning_rate": 9.959341380630448e-05, "loss": 0.0241, "step": 8860 }, { "epoch": 11.874163319946453, "grad_norm": 0.4078998863697052, "learning_rate": 9.959130674501746e-05, "loss": 0.0191, "step": 8870 }, { "epoch": 11.887550200803213, "grad_norm": 0.20494545996189117, "learning_rate": 9.958919426049968e-05, "loss": 0.0218, "step": 8880 }, { "epoch": 11.900937081659974, "grad_norm": 0.3534897267818451, "learning_rate": 9.958707635298219e-05, "loss": 0.0205, "step": 8890 }, { "epoch": 11.914323962516734, "grad_norm": 0.24215039610862732, "learning_rate": 9.958495302269657e-05, "loss": 0.0193, "step": 8900 }, { "epoch": 11.927710843373493, "grad_norm": 0.27808767557144165, "learning_rate": 9.958282426987503e-05, "loss": 0.019, "step": 8910 }, { "epoch": 11.941097724230254, "grad_norm": 0.6683477759361267, "learning_rate": 9.95806900947504e-05, "loss": 0.0182, "step": 8920 }, { "epoch": 11.954484605087014, "grad_norm": 0.4169679284095764, "learning_rate": 9.957855049755604e-05, "loss": 0.0216, "step": 8930 }, { "epoch": 11.967871485943775, "grad_norm": 0.18976542353630066, "learning_rate": 9.957640547852593e-05, "loss": 0.0211, "step": 8940 }, { "epoch": 11.981258366800535, "grad_norm": 0.22903086245059967, "learning_rate": 9.957425503789466e-05, "loss": 0.0189, "step": 8950 }, { "epoch": 11.994645247657296, "grad_norm": 0.23730523884296417, "learning_rate": 9.957209917589738e-05, "loss": 0.0204, "step": 8960 }, { "epoch": 12.008032128514056, "grad_norm": 0.2915104031562805, "learning_rate": 9.956993789276987e-05, "loss": 0.0225, "step": 8970 }, { "epoch": 12.021419009370817, "grad_norm": 0.2969241142272949, "learning_rate": 9.956777118874847e-05, "loss": 0.0197, "step": 8980 }, { "epoch": 12.034805890227577, "grad_norm": 0.20273692905902863, "learning_rate": 9.956559906407016e-05, "loss": 0.018, "step": 8990 }, { "epoch": 12.048192771084338, "grad_norm": 0.20086808502674103, "learning_rate": 9.956342151897245e-05, "loss": 0.0209, "step": 9000 }, { "epoch": 12.061579651941098, "grad_norm": 0.2803514897823334, "learning_rate": 9.956123855369346e-05, "loss": 0.0213, "step": 9010 }, { "epoch": 12.074966532797857, "grad_norm": 0.22226445376873016, "learning_rate": 9.955905016847196e-05, "loss": 0.0197, "step": 9020 }, { "epoch": 12.088353413654618, "grad_norm": 0.6852798461914062, "learning_rate": 9.955685636354723e-05, "loss": 0.0225, "step": 9030 }, { "epoch": 12.101740294511378, "grad_norm": 0.22552883625030518, "learning_rate": 9.95546571391592e-05, "loss": 0.0223, "step": 9040 }, { "epoch": 12.115127175368139, "grad_norm": 1.0456491708755493, "learning_rate": 9.955245249554837e-05, "loss": 0.0187, "step": 9050 }, { "epoch": 12.1285140562249, "grad_norm": 0.30096811056137085, "learning_rate": 9.955024243295582e-05, "loss": 0.0171, "step": 9060 }, { "epoch": 12.14190093708166, "grad_norm": 0.4315531551837921, "learning_rate": 9.954802695162328e-05, "loss": 0.0173, "step": 9070 }, { "epoch": 12.15528781793842, "grad_norm": 0.28275027871131897, "learning_rate": 9.954580605179302e-05, "loss": 0.0182, "step": 9080 }, { "epoch": 12.168674698795181, "grad_norm": 0.22482627630233765, "learning_rate": 9.954357973370788e-05, "loss": 0.0158, "step": 9090 }, { "epoch": 12.182061579651942, "grad_norm": 0.3098059892654419, "learning_rate": 9.954134799761135e-05, "loss": 0.0185, "step": 9100 }, { "epoch": 12.195448460508702, "grad_norm": 0.6468265056610107, "learning_rate": 9.953911084374748e-05, "loss": 0.0156, "step": 9110 }, { "epoch": 12.208835341365463, "grad_norm": 0.24526795744895935, "learning_rate": 9.953686827236093e-05, "loss": 0.0172, "step": 9120 }, { "epoch": 12.222222222222221, "grad_norm": 0.22665637731552124, "learning_rate": 9.953462028369695e-05, "loss": 0.0195, "step": 9130 }, { "epoch": 12.235609103078982, "grad_norm": 0.2692811191082001, "learning_rate": 9.953236687800136e-05, "loss": 0.02, "step": 9140 }, { "epoch": 12.248995983935743, "grad_norm": 0.24607963860034943, "learning_rate": 9.95301080555206e-05, "loss": 0.0161, "step": 9150 }, { "epoch": 12.262382864792503, "grad_norm": 0.25311797857284546, "learning_rate": 9.952784381650171e-05, "loss": 0.0208, "step": 9160 }, { "epoch": 12.275769745649264, "grad_norm": 0.2803364098072052, "learning_rate": 9.952557416119226e-05, "loss": 0.0156, "step": 9170 }, { "epoch": 12.289156626506024, "grad_norm": 0.3879629075527191, "learning_rate": 9.95232990898405e-05, "loss": 0.0185, "step": 9180 }, { "epoch": 12.302543507362785, "grad_norm": 0.2148362398147583, "learning_rate": 9.95210186026952e-05, "loss": 0.0175, "step": 9190 }, { "epoch": 12.315930388219545, "grad_norm": 0.5059884190559387, "learning_rate": 9.951873270000576e-05, "loss": 0.0198, "step": 9200 }, { "epoch": 12.329317269076306, "grad_norm": 0.3031889498233795, "learning_rate": 9.951644138202216e-05, "loss": 0.0223, "step": 9210 }, { "epoch": 12.342704149933066, "grad_norm": 0.5824258923530579, "learning_rate": 9.951414464899498e-05, "loss": 0.0177, "step": 9220 }, { "epoch": 12.356091030789827, "grad_norm": 0.21879027783870697, "learning_rate": 9.951184250117538e-05, "loss": 0.0186, "step": 9230 }, { "epoch": 12.369477911646586, "grad_norm": 0.21553708612918854, "learning_rate": 9.950953493881513e-05, "loss": 0.0184, "step": 9240 }, { "epoch": 12.382864792503346, "grad_norm": 0.28692182898521423, "learning_rate": 9.950722196216658e-05, "loss": 0.0178, "step": 9250 }, { "epoch": 12.396251673360107, "grad_norm": 0.3250883221626282, "learning_rate": 9.950490357148265e-05, "loss": 0.0209, "step": 9260 }, { "epoch": 12.409638554216867, "grad_norm": 0.2115074098110199, "learning_rate": 9.950257976701692e-05, "loss": 0.0203, "step": 9270 }, { "epoch": 12.423025435073628, "grad_norm": 0.2682338356971741, "learning_rate": 9.950025054902348e-05, "loss": 0.0229, "step": 9280 }, { "epoch": 12.436412315930388, "grad_norm": 0.38536298274993896, "learning_rate": 9.949791591775706e-05, "loss": 0.0209, "step": 9290 }, { "epoch": 12.449799196787149, "grad_norm": 0.4177658259868622, "learning_rate": 9.949557587347298e-05, "loss": 0.0192, "step": 9300 }, { "epoch": 12.46318607764391, "grad_norm": 0.21393859386444092, "learning_rate": 9.949323041642713e-05, "loss": 0.0224, "step": 9310 }, { "epoch": 12.47657295850067, "grad_norm": 0.5310548543930054, "learning_rate": 9.949087954687602e-05, "loss": 0.0216, "step": 9320 }, { "epoch": 12.48995983935743, "grad_norm": 0.2455093264579773, "learning_rate": 9.948852326507672e-05, "loss": 0.0196, "step": 9330 }, { "epoch": 12.50334672021419, "grad_norm": 0.26638224720954895, "learning_rate": 9.948616157128694e-05, "loss": 0.0224, "step": 9340 }, { "epoch": 12.51673360107095, "grad_norm": 0.25116074085235596, "learning_rate": 9.948379446576493e-05, "loss": 0.0251, "step": 9350 }, { "epoch": 12.53012048192771, "grad_norm": 0.29785144329071045, "learning_rate": 9.948142194876952e-05, "loss": 0.0214, "step": 9360 }, { "epoch": 12.54350736278447, "grad_norm": 0.43517211079597473, "learning_rate": 9.947904402056024e-05, "loss": 0.0174, "step": 9370 }, { "epoch": 12.556894243641231, "grad_norm": 0.6741633415222168, "learning_rate": 9.947666068139708e-05, "loss": 0.0175, "step": 9380 }, { "epoch": 12.570281124497992, "grad_norm": 0.4893437922000885, "learning_rate": 9.947427193154071e-05, "loss": 0.0179, "step": 9390 }, { "epoch": 12.583668005354752, "grad_norm": 0.25091785192489624, "learning_rate": 9.947187777125233e-05, "loss": 0.0194, "step": 9400 }, { "epoch": 12.597054886211513, "grad_norm": 0.5760458111763, "learning_rate": 9.946947820079377e-05, "loss": 0.0213, "step": 9410 }, { "epoch": 12.610441767068274, "grad_norm": 0.3899858891963959, "learning_rate": 9.946707322042747e-05, "loss": 0.0178, "step": 9420 }, { "epoch": 12.623828647925034, "grad_norm": 0.24175721406936646, "learning_rate": 9.94646628304164e-05, "loss": 0.0161, "step": 9430 }, { "epoch": 12.637215528781795, "grad_norm": 0.33354800939559937, "learning_rate": 9.946224703102418e-05, "loss": 0.0168, "step": 9440 }, { "epoch": 12.650602409638553, "grad_norm": 0.19526804983615875, "learning_rate": 9.945982582251498e-05, "loss": 0.0174, "step": 9450 }, { "epoch": 12.663989290495314, "grad_norm": 0.3223598003387451, "learning_rate": 9.94573992051536e-05, "loss": 0.018, "step": 9460 }, { "epoch": 12.677376171352075, "grad_norm": 0.5423722863197327, "learning_rate": 9.94549671792054e-05, "loss": 0.0218, "step": 9470 }, { "epoch": 12.690763052208835, "grad_norm": 0.3056928813457489, "learning_rate": 9.945252974493635e-05, "loss": 0.0198, "step": 9480 }, { "epoch": 12.704149933065596, "grad_norm": 0.35069262981414795, "learning_rate": 9.9450086902613e-05, "loss": 0.0185, "step": 9490 }, { "epoch": 12.717536813922356, "grad_norm": 0.18980860710144043, "learning_rate": 9.944763865250248e-05, "loss": 0.015, "step": 9500 }, { "epoch": 12.730923694779117, "grad_norm": 0.5088186860084534, "learning_rate": 9.944518499487254e-05, "loss": 0.0149, "step": 9510 }, { "epoch": 12.744310575635877, "grad_norm": 0.3039194643497467, "learning_rate": 9.944272592999151e-05, "loss": 0.017, "step": 9520 }, { "epoch": 12.757697456492638, "grad_norm": 0.15645731985569, "learning_rate": 9.94402614581283e-05, "loss": 0.0197, "step": 9530 }, { "epoch": 12.771084337349398, "grad_norm": 0.21862560510635376, "learning_rate": 9.943779157955244e-05, "loss": 0.0165, "step": 9540 }, { "epoch": 12.784471218206157, "grad_norm": 0.22642667591571808, "learning_rate": 9.943531629453403e-05, "loss": 0.0177, "step": 9550 }, { "epoch": 12.797858099062918, "grad_norm": 0.3213987648487091, "learning_rate": 9.943283560334375e-05, "loss": 0.0183, "step": 9560 }, { "epoch": 12.811244979919678, "grad_norm": 0.26343944668769836, "learning_rate": 9.943034950625288e-05, "loss": 0.0178, "step": 9570 }, { "epoch": 12.824631860776439, "grad_norm": 0.21412791311740875, "learning_rate": 9.942785800353332e-05, "loss": 0.0159, "step": 9580 }, { "epoch": 12.8380187416332, "grad_norm": 0.21009358763694763, "learning_rate": 9.942536109545751e-05, "loss": 0.0191, "step": 9590 }, { "epoch": 12.85140562248996, "grad_norm": 0.32826316356658936, "learning_rate": 9.942285878229853e-05, "loss": 0.017, "step": 9600 }, { "epoch": 12.86479250334672, "grad_norm": 0.29559192061424255, "learning_rate": 9.942035106433001e-05, "loss": 0.0169, "step": 9610 }, { "epoch": 12.87817938420348, "grad_norm": 0.261893093585968, "learning_rate": 9.94178379418262e-05, "loss": 0.0169, "step": 9620 }, { "epoch": 12.891566265060241, "grad_norm": 0.2633693516254425, "learning_rate": 9.941531941506194e-05, "loss": 0.0182, "step": 9630 }, { "epoch": 12.904953145917002, "grad_norm": 0.24273455142974854, "learning_rate": 9.941279548431263e-05, "loss": 0.0177, "step": 9640 }, { "epoch": 12.918340026773762, "grad_norm": 0.2046523243188858, "learning_rate": 9.941026614985431e-05, "loss": 0.0203, "step": 9650 }, { "epoch": 12.931726907630523, "grad_norm": 0.6674339175224304, "learning_rate": 9.940773141196357e-05, "loss": 0.0219, "step": 9660 }, { "epoch": 12.945113788487282, "grad_norm": 1.056938886642456, "learning_rate": 9.94051912709176e-05, "loss": 0.0173, "step": 9670 }, { "epoch": 12.958500669344042, "grad_norm": 0.24687854945659637, "learning_rate": 9.940264572699421e-05, "loss": 0.0199, "step": 9680 }, { "epoch": 12.971887550200803, "grad_norm": 1.4097120761871338, "learning_rate": 9.940009478047174e-05, "loss": 0.018, "step": 9690 }, { "epoch": 12.985274431057563, "grad_norm": 0.24821308255195618, "learning_rate": 9.939753843162918e-05, "loss": 0.0198, "step": 9700 }, { "epoch": 12.998661311914324, "grad_norm": 0.4567914605140686, "learning_rate": 9.939497668074609e-05, "loss": 0.0219, "step": 9710 }, { "epoch": 13.012048192771084, "grad_norm": 0.22397929430007935, "learning_rate": 9.93924095281026e-05, "loss": 0.0188, "step": 9720 }, { "epoch": 13.025435073627845, "grad_norm": 0.29138076305389404, "learning_rate": 9.938983697397948e-05, "loss": 0.0203, "step": 9730 }, { "epoch": 13.038821954484606, "grad_norm": 0.24327270686626434, "learning_rate": 9.938725901865805e-05, "loss": 0.0167, "step": 9740 }, { "epoch": 13.052208835341366, "grad_norm": 0.5707051753997803, "learning_rate": 9.93846756624202e-05, "loss": 0.0241, "step": 9750 }, { "epoch": 13.065595716198127, "grad_norm": 0.23783756792545319, "learning_rate": 9.938208690554849e-05, "loss": 0.0184, "step": 9760 }, { "epoch": 13.078982597054885, "grad_norm": 0.18546143174171448, "learning_rate": 9.9379492748326e-05, "loss": 0.0177, "step": 9770 }, { "epoch": 13.092369477911646, "grad_norm": 0.3647522032260895, "learning_rate": 9.937689319103641e-05, "loss": 0.0165, "step": 9780 }, { "epoch": 13.105756358768406, "grad_norm": 0.25068917870521545, "learning_rate": 9.937428823396404e-05, "loss": 0.0215, "step": 9790 }, { "epoch": 13.119143239625167, "grad_norm": 0.2134212851524353, "learning_rate": 9.937167787739372e-05, "loss": 0.0199, "step": 9800 }, { "epoch": 13.132530120481928, "grad_norm": 0.2553845942020416, "learning_rate": 9.936906212161095e-05, "loss": 0.016, "step": 9810 }, { "epoch": 13.145917001338688, "grad_norm": 0.3372953236103058, "learning_rate": 9.936644096690176e-05, "loss": 0.0162, "step": 9820 }, { "epoch": 13.159303882195449, "grad_norm": 0.33426544070243835, "learning_rate": 9.936381441355282e-05, "loss": 0.0144, "step": 9830 }, { "epoch": 13.17269076305221, "grad_norm": 0.39472004771232605, "learning_rate": 9.936118246185136e-05, "loss": 0.0177, "step": 9840 }, { "epoch": 13.18607764390897, "grad_norm": 0.2301846295595169, "learning_rate": 9.935854511208518e-05, "loss": 0.0199, "step": 9850 }, { "epoch": 13.19946452476573, "grad_norm": 0.33551812171936035, "learning_rate": 9.935590236454272e-05, "loss": 0.0198, "step": 9860 }, { "epoch": 13.21285140562249, "grad_norm": 0.2269711047410965, "learning_rate": 9.935325421951298e-05, "loss": 0.0178, "step": 9870 }, { "epoch": 13.22623828647925, "grad_norm": 0.27811458706855774, "learning_rate": 9.935060067728557e-05, "loss": 0.0192, "step": 9880 }, { "epoch": 13.23962516733601, "grad_norm": 0.2934311032295227, "learning_rate": 9.934794173815067e-05, "loss": 0.0175, "step": 9890 }, { "epoch": 13.25301204819277, "grad_norm": 0.20336079597473145, "learning_rate": 9.934527740239906e-05, "loss": 0.0188, "step": 9900 }, { "epoch": 13.266398929049531, "grad_norm": 0.2290450930595398, "learning_rate": 9.934260767032209e-05, "loss": 0.0181, "step": 9910 }, { "epoch": 13.279785809906292, "grad_norm": 0.1790703684091568, "learning_rate": 9.933993254221172e-05, "loss": 0.0165, "step": 9920 }, { "epoch": 13.293172690763052, "grad_norm": 0.35878413915634155, "learning_rate": 9.933725201836053e-05, "loss": 0.0203, "step": 9930 }, { "epoch": 13.306559571619813, "grad_norm": 0.9082741737365723, "learning_rate": 9.933456609906162e-05, "loss": 0.0202, "step": 9940 }, { "epoch": 13.319946452476573, "grad_norm": 0.23891811072826385, "learning_rate": 9.933187478460875e-05, "loss": 0.0186, "step": 9950 }, { "epoch": 13.333333333333334, "grad_norm": 0.5497512221336365, "learning_rate": 9.93291780752962e-05, "loss": 0.0175, "step": 9960 }, { "epoch": 13.346720214190094, "grad_norm": 0.2570192515850067, "learning_rate": 9.932647597141893e-05, "loss": 0.0173, "step": 9970 }, { "epoch": 13.360107095046853, "grad_norm": 0.19951923191547394, "learning_rate": 9.932376847327239e-05, "loss": 0.0167, "step": 9980 }, { "epoch": 13.373493975903614, "grad_norm": 0.32901355624198914, "learning_rate": 9.932105558115268e-05, "loss": 0.0197, "step": 9990 }, { "epoch": 13.386880856760374, "grad_norm": 0.25658702850341797, "learning_rate": 9.931833729535651e-05, "loss": 0.0178, "step": 10000 }, { "epoch": 13.400267737617135, "grad_norm": 0.2332061380147934, "learning_rate": 9.931561361618111e-05, "loss": 0.0188, "step": 10010 }, { "epoch": 13.413654618473895, "grad_norm": 0.2431224137544632, "learning_rate": 9.931288454392435e-05, "loss": 0.0176, "step": 10020 }, { "epoch": 13.427041499330656, "grad_norm": 0.25236713886260986, "learning_rate": 9.931015007888467e-05, "loss": 0.0169, "step": 10030 }, { "epoch": 13.440428380187416, "grad_norm": 0.21126329898834229, "learning_rate": 9.930741022136112e-05, "loss": 0.0187, "step": 10040 }, { "epoch": 13.453815261044177, "grad_norm": 0.16579249501228333, "learning_rate": 9.930466497165333e-05, "loss": 0.0164, "step": 10050 }, { "epoch": 13.467202141900938, "grad_norm": 0.29690003395080566, "learning_rate": 9.93019143300615e-05, "loss": 0.0163, "step": 10060 }, { "epoch": 13.480589022757698, "grad_norm": 0.28076401352882385, "learning_rate": 9.929915829688644e-05, "loss": 0.015, "step": 10070 }, { "epoch": 13.493975903614459, "grad_norm": 0.19393910467624664, "learning_rate": 9.929639687242955e-05, "loss": 0.0167, "step": 10080 }, { "epoch": 13.507362784471217, "grad_norm": 0.288582444190979, "learning_rate": 9.929363005699281e-05, "loss": 0.0191, "step": 10090 }, { "epoch": 13.520749665327978, "grad_norm": 0.1995312124490738, "learning_rate": 9.92908578508788e-05, "loss": 0.0172, "step": 10100 }, { "epoch": 13.534136546184738, "grad_norm": 0.3212960958480835, "learning_rate": 9.928808025439069e-05, "loss": 0.0185, "step": 10110 }, { "epoch": 13.547523427041499, "grad_norm": 0.694835901260376, "learning_rate": 9.928529726783223e-05, "loss": 0.0168, "step": 10120 }, { "epoch": 13.56091030789826, "grad_norm": 0.19858577847480774, "learning_rate": 9.928250889150774e-05, "loss": 0.0157, "step": 10130 }, { "epoch": 13.57429718875502, "grad_norm": 0.1725478619337082, "learning_rate": 9.92797151257222e-05, "loss": 0.0154, "step": 10140 }, { "epoch": 13.58768406961178, "grad_norm": 0.21249403059482574, "learning_rate": 9.927691597078108e-05, "loss": 0.016, "step": 10150 }, { "epoch": 13.601070950468541, "grad_norm": 0.3101791739463806, "learning_rate": 9.927411142699053e-05, "loss": 0.0175, "step": 10160 }, { "epoch": 13.614457831325302, "grad_norm": 0.1781795173883438, "learning_rate": 9.927130149465725e-05, "loss": 0.0175, "step": 10170 }, { "epoch": 13.627844712182062, "grad_norm": 0.2784191966056824, "learning_rate": 9.92684861740885e-05, "loss": 0.0205, "step": 10180 }, { "epoch": 13.641231593038821, "grad_norm": 0.27612581849098206, "learning_rate": 9.926566546559217e-05, "loss": 0.0182, "step": 10190 }, { "epoch": 13.654618473895582, "grad_norm": 0.2819475829601288, "learning_rate": 9.926283936947673e-05, "loss": 0.015, "step": 10200 }, { "epoch": 13.668005354752342, "grad_norm": 0.29691892862319946, "learning_rate": 9.926000788605126e-05, "loss": 0.0133, "step": 10210 }, { "epoch": 13.681392235609103, "grad_norm": 1.0746318101882935, "learning_rate": 9.92571710156254e-05, "loss": 0.0172, "step": 10220 }, { "epoch": 13.694779116465863, "grad_norm": 0.35927948355674744, "learning_rate": 9.925432875850936e-05, "loss": 0.0165, "step": 10230 }, { "epoch": 13.708165997322624, "grad_norm": 0.4257536828517914, "learning_rate": 9.925148111501396e-05, "loss": 0.0163, "step": 10240 }, { "epoch": 13.721552878179384, "grad_norm": 0.634962797164917, "learning_rate": 9.924862808545066e-05, "loss": 0.0173, "step": 10250 }, { "epoch": 13.734939759036145, "grad_norm": 0.33713269233703613, "learning_rate": 9.924576967013141e-05, "loss": 0.0144, "step": 10260 }, { "epoch": 13.748326639892905, "grad_norm": 0.24455511569976807, "learning_rate": 9.924290586936887e-05, "loss": 0.0184, "step": 10270 }, { "epoch": 13.761713520749666, "grad_norm": 0.29870662093162537, "learning_rate": 9.924003668347614e-05, "loss": 0.0195, "step": 10280 }, { "epoch": 13.775100401606426, "grad_norm": 0.26899126172065735, "learning_rate": 9.923716211276704e-05, "loss": 0.0179, "step": 10290 }, { "epoch": 13.788487282463187, "grad_norm": 0.23501230776309967, "learning_rate": 9.923428215755594e-05, "loss": 0.0185, "step": 10300 }, { "epoch": 13.801874163319946, "grad_norm": 0.29251348972320557, "learning_rate": 9.923139681815775e-05, "loss": 0.0167, "step": 10310 }, { "epoch": 13.815261044176706, "grad_norm": 0.332487016916275, "learning_rate": 9.922850609488801e-05, "loss": 0.019, "step": 10320 }, { "epoch": 13.828647925033467, "grad_norm": 0.2148713767528534, "learning_rate": 9.922560998806287e-05, "loss": 0.0166, "step": 10330 }, { "epoch": 13.842034805890227, "grad_norm": 0.2817719578742981, "learning_rate": 9.922270849799905e-05, "loss": 0.019, "step": 10340 }, { "epoch": 13.855421686746988, "grad_norm": 0.23132416605949402, "learning_rate": 9.92198016250138e-05, "loss": 0.0173, "step": 10350 }, { "epoch": 13.868808567603748, "grad_norm": 0.19635313749313354, "learning_rate": 9.921688936942506e-05, "loss": 0.018, "step": 10360 }, { "epoch": 13.882195448460509, "grad_norm": 0.2807234227657318, "learning_rate": 9.921397173155129e-05, "loss": 0.0206, "step": 10370 }, { "epoch": 13.89558232931727, "grad_norm": 0.2643081247806549, "learning_rate": 9.921104871171157e-05, "loss": 0.0198, "step": 10380 }, { "epoch": 13.90896921017403, "grad_norm": 0.2869664430618286, "learning_rate": 9.920812031022554e-05, "loss": 0.0185, "step": 10390 }, { "epoch": 13.92235609103079, "grad_norm": 0.33274558186531067, "learning_rate": 9.920518652741348e-05, "loss": 0.0204, "step": 10400 }, { "epoch": 13.93574297188755, "grad_norm": 0.2979962229728699, "learning_rate": 9.920224736359618e-05, "loss": 0.0192, "step": 10410 }, { "epoch": 13.94912985274431, "grad_norm": 0.3126591145992279, "learning_rate": 9.91993028190951e-05, "loss": 0.0178, "step": 10420 }, { "epoch": 13.96251673360107, "grad_norm": 0.16999797523021698, "learning_rate": 9.919635289423222e-05, "loss": 0.0173, "step": 10430 }, { "epoch": 13.975903614457831, "grad_norm": 0.29944726824760437, "learning_rate": 9.919339758933015e-05, "loss": 0.0168, "step": 10440 }, { "epoch": 13.989290495314592, "grad_norm": 0.28780680894851685, "learning_rate": 9.919043690471209e-05, "loss": 0.0171, "step": 10450 }, { "epoch": 14.002677376171352, "grad_norm": 0.28365975618362427, "learning_rate": 9.91874708407018e-05, "loss": 0.0164, "step": 10460 }, { "epoch": 14.016064257028113, "grad_norm": 0.24466438591480255, "learning_rate": 9.918449939762367e-05, "loss": 0.017, "step": 10470 }, { "epoch": 14.029451137884873, "grad_norm": 0.2700306177139282, "learning_rate": 9.91815225758026e-05, "loss": 0.0189, "step": 10480 }, { "epoch": 14.042838018741634, "grad_norm": 0.21381980180740356, "learning_rate": 9.917854037556419e-05, "loss": 0.0174, "step": 10490 }, { "epoch": 14.056224899598394, "grad_norm": 0.26147621870040894, "learning_rate": 9.917555279723454e-05, "loss": 0.0168, "step": 10500 }, { "epoch": 14.069611780455155, "grad_norm": 0.3067070245742798, "learning_rate": 9.917255984114036e-05, "loss": 0.0217, "step": 10510 }, { "epoch": 14.082998661311914, "grad_norm": 0.27916887402534485, "learning_rate": 9.916956150760896e-05, "loss": 0.0155, "step": 10520 }, { "epoch": 14.096385542168674, "grad_norm": 0.2096703052520752, "learning_rate": 9.916655779696826e-05, "loss": 0.017, "step": 10530 }, { "epoch": 14.109772423025435, "grad_norm": 0.27258458733558655, "learning_rate": 9.916354870954671e-05, "loss": 0.0167, "step": 10540 }, { "epoch": 14.123159303882195, "grad_norm": 0.22636482119560242, "learning_rate": 9.91605342456734e-05, "loss": 0.0204, "step": 10550 }, { "epoch": 14.136546184738956, "grad_norm": 0.6406905055046082, "learning_rate": 9.915751440567795e-05, "loss": 0.0167, "step": 10560 }, { "epoch": 14.149933065595716, "grad_norm": 0.3608877658843994, "learning_rate": 9.915448918989066e-05, "loss": 0.0175, "step": 10570 }, { "epoch": 14.163319946452477, "grad_norm": 0.37367382645606995, "learning_rate": 9.915145859864232e-05, "loss": 0.0184, "step": 10580 }, { "epoch": 14.176706827309237, "grad_norm": 0.16432592272758484, "learning_rate": 9.914842263226437e-05, "loss": 0.0145, "step": 10590 }, { "epoch": 14.190093708165998, "grad_norm": 0.25311774015426636, "learning_rate": 9.914538129108882e-05, "loss": 0.017, "step": 10600 }, { "epoch": 14.203480589022758, "grad_norm": 0.17505666613578796, "learning_rate": 9.914233457544825e-05, "loss": 0.0159, "step": 10610 }, { "epoch": 14.216867469879517, "grad_norm": 0.2621474266052246, "learning_rate": 9.913928248567586e-05, "loss": 0.0198, "step": 10620 }, { "epoch": 14.230254350736278, "grad_norm": 0.7606595158576965, "learning_rate": 9.913622502210542e-05, "loss": 0.0208, "step": 10630 }, { "epoch": 14.243641231593038, "grad_norm": 0.8836305141448975, "learning_rate": 9.913316218507128e-05, "loss": 0.0186, "step": 10640 }, { "epoch": 14.257028112449799, "grad_norm": 0.3921626806259155, "learning_rate": 9.91300939749084e-05, "loss": 0.0205, "step": 10650 }, { "epoch": 14.27041499330656, "grad_norm": 0.20361407101154327, "learning_rate": 9.91270203919523e-05, "loss": 0.0199, "step": 10660 }, { "epoch": 14.28380187416332, "grad_norm": 0.5414855480194092, "learning_rate": 9.912394143653912e-05, "loss": 0.0153, "step": 10670 }, { "epoch": 14.29718875502008, "grad_norm": 0.28239333629608154, "learning_rate": 9.912085710900555e-05, "loss": 0.0176, "step": 10680 }, { "epoch": 14.310575635876841, "grad_norm": 0.23015441000461578, "learning_rate": 9.911776740968892e-05, "loss": 0.0214, "step": 10690 }, { "epoch": 14.323962516733602, "grad_norm": 0.24507403373718262, "learning_rate": 9.911467233892709e-05, "loss": 0.019, "step": 10700 }, { "epoch": 14.337349397590362, "grad_norm": 0.2901628315448761, "learning_rate": 9.911157189705853e-05, "loss": 0.0184, "step": 10710 }, { "epoch": 14.350736278447123, "grad_norm": 0.2919355630874634, "learning_rate": 9.910846608442229e-05, "loss": 0.0171, "step": 10720 }, { "epoch": 14.364123159303881, "grad_norm": 0.353199303150177, "learning_rate": 9.910535490135805e-05, "loss": 0.0193, "step": 10730 }, { "epoch": 14.377510040160642, "grad_norm": 0.2718732953071594, "learning_rate": 9.910223834820603e-05, "loss": 0.0206, "step": 10740 }, { "epoch": 14.390896921017402, "grad_norm": 0.2439352571964264, "learning_rate": 9.909911642530703e-05, "loss": 0.0168, "step": 10750 }, { "epoch": 14.404283801874163, "grad_norm": 0.2106187790632248, "learning_rate": 9.909598913300249e-05, "loss": 0.0198, "step": 10760 }, { "epoch": 14.417670682730924, "grad_norm": 0.2406354546546936, "learning_rate": 9.909285647163438e-05, "loss": 0.0243, "step": 10770 }, { "epoch": 14.431057563587684, "grad_norm": 0.3972150385379791, "learning_rate": 9.908971844154531e-05, "loss": 0.0212, "step": 10780 }, { "epoch": 14.444444444444445, "grad_norm": 0.24506226181983948, "learning_rate": 9.908657504307843e-05, "loss": 0.0216, "step": 10790 }, { "epoch": 14.457831325301205, "grad_norm": 0.3434704542160034, "learning_rate": 9.908342627657751e-05, "loss": 0.0216, "step": 10800 }, { "epoch": 14.471218206157966, "grad_norm": 0.32262077927589417, "learning_rate": 9.908027214238689e-05, "loss": 0.0219, "step": 10810 }, { "epoch": 14.484605087014726, "grad_norm": 0.22748786211013794, "learning_rate": 9.90771126408515e-05, "loss": 0.0209, "step": 10820 }, { "epoch": 14.497991967871485, "grad_norm": 0.3117317259311676, "learning_rate": 9.907394777231685e-05, "loss": 0.0231, "step": 10830 }, { "epoch": 14.511378848728246, "grad_norm": 0.34504544734954834, "learning_rate": 9.907077753712905e-05, "loss": 0.0195, "step": 10840 }, { "epoch": 14.524765729585006, "grad_norm": 0.6018182635307312, "learning_rate": 9.906760193563482e-05, "loss": 0.0194, "step": 10850 }, { "epoch": 14.538152610441767, "grad_norm": 0.28711724281311035, "learning_rate": 9.906442096818139e-05, "loss": 0.0213, "step": 10860 }, { "epoch": 14.551539491298527, "grad_norm": 0.33356213569641113, "learning_rate": 9.906123463511665e-05, "loss": 0.0216, "step": 10870 }, { "epoch": 14.564926372155288, "grad_norm": 0.1928839534521103, "learning_rate": 9.905804293678907e-05, "loss": 0.0195, "step": 10880 }, { "epoch": 14.578313253012048, "grad_norm": 0.37804341316223145, "learning_rate": 9.905484587354766e-05, "loss": 0.0192, "step": 10890 }, { "epoch": 14.591700133868809, "grad_norm": 0.2524521052837372, "learning_rate": 9.905164344574205e-05, "loss": 0.0208, "step": 10900 }, { "epoch": 14.60508701472557, "grad_norm": 0.231740340590477, "learning_rate": 9.904843565372248e-05, "loss": 0.0179, "step": 10910 }, { "epoch": 14.61847389558233, "grad_norm": 0.368939608335495, "learning_rate": 9.904522249783972e-05, "loss": 0.0228, "step": 10920 }, { "epoch": 14.63186077643909, "grad_norm": 0.3798445761203766, "learning_rate": 9.904200397844517e-05, "loss": 0.0202, "step": 10930 }, { "epoch": 14.645247657295851, "grad_norm": 0.28030848503112793, "learning_rate": 9.903878009589078e-05, "loss": 0.0199, "step": 10940 }, { "epoch": 14.65863453815261, "grad_norm": 0.25314855575561523, "learning_rate": 9.903555085052915e-05, "loss": 0.0185, "step": 10950 }, { "epoch": 14.67202141900937, "grad_norm": 0.2100222259759903, "learning_rate": 9.903231624271338e-05, "loss": 0.0189, "step": 10960 }, { "epoch": 14.68540829986613, "grad_norm": 0.2913297712802887, "learning_rate": 9.902907627279724e-05, "loss": 0.0183, "step": 10970 }, { "epoch": 14.698795180722891, "grad_norm": 0.41315460205078125, "learning_rate": 9.902583094113504e-05, "loss": 0.0214, "step": 10980 }, { "epoch": 14.712182061579652, "grad_norm": 0.2739650309085846, "learning_rate": 9.902258024808168e-05, "loss": 0.0187, "step": 10990 }, { "epoch": 14.725568942436412, "grad_norm": 0.3123878836631775, "learning_rate": 9.901932419399264e-05, "loss": 0.0206, "step": 11000 }, { "epoch": 14.738955823293173, "grad_norm": 0.19242413341999054, "learning_rate": 9.9016062779224e-05, "loss": 0.016, "step": 11010 }, { "epoch": 14.752342704149934, "grad_norm": 0.357168585062027, "learning_rate": 9.901279600413242e-05, "loss": 0.0186, "step": 11020 }, { "epoch": 14.765729585006694, "grad_norm": 0.24390320479869843, "learning_rate": 9.900952386907518e-05, "loss": 0.0173, "step": 11030 }, { "epoch": 14.779116465863455, "grad_norm": 1.3726338148117065, "learning_rate": 9.90062463744101e-05, "loss": 0.0187, "step": 11040 }, { "epoch": 14.792503346720213, "grad_norm": 0.23764634132385254, "learning_rate": 9.900296352049558e-05, "loss": 0.0208, "step": 11050 }, { "epoch": 14.805890227576974, "grad_norm": 0.23396968841552734, "learning_rate": 9.899967530769065e-05, "loss": 0.0221, "step": 11060 }, { "epoch": 14.819277108433734, "grad_norm": 0.21959319710731506, "learning_rate": 9.899638173635489e-05, "loss": 0.0187, "step": 11070 }, { "epoch": 14.832663989290495, "grad_norm": 0.6405588388442993, "learning_rate": 9.899308280684849e-05, "loss": 0.0165, "step": 11080 }, { "epoch": 14.846050870147256, "grad_norm": 0.3934445381164551, "learning_rate": 9.898977851953222e-05, "loss": 0.0171, "step": 11090 }, { "epoch": 14.859437751004016, "grad_norm": 0.18102611601352692, "learning_rate": 9.898646887476741e-05, "loss": 0.0156, "step": 11100 }, { "epoch": 14.872824631860777, "grad_norm": 0.19863373041152954, "learning_rate": 9.898315387291603e-05, "loss": 0.0176, "step": 11110 }, { "epoch": 14.886211512717537, "grad_norm": 0.2408813089132309, "learning_rate": 9.89798335143406e-05, "loss": 0.017, "step": 11120 }, { "epoch": 14.899598393574298, "grad_norm": 0.1780061572790146, "learning_rate": 9.897650779940419e-05, "loss": 0.0161, "step": 11130 }, { "epoch": 14.912985274431058, "grad_norm": 0.2594665586948395, "learning_rate": 9.897317672847054e-05, "loss": 0.0171, "step": 11140 }, { "epoch": 14.926372155287819, "grad_norm": 0.1709137111902237, "learning_rate": 9.89698403019039e-05, "loss": 0.0161, "step": 11150 }, { "epoch": 14.939759036144578, "grad_norm": 0.27670401334762573, "learning_rate": 9.896649852006917e-05, "loss": 0.0153, "step": 11160 }, { "epoch": 14.953145917001338, "grad_norm": 0.27632999420166016, "learning_rate": 9.896315138333177e-05, "loss": 0.0184, "step": 11170 }, { "epoch": 14.966532797858099, "grad_norm": 0.22678805887699127, "learning_rate": 9.895979889205774e-05, "loss": 0.0165, "step": 11180 }, { "epoch": 14.97991967871486, "grad_norm": 0.22718067467212677, "learning_rate": 9.895644104661372e-05, "loss": 0.0167, "step": 11190 }, { "epoch": 14.99330655957162, "grad_norm": 0.2179504781961441, "learning_rate": 9.895307784736691e-05, "loss": 0.0192, "step": 11200 }, { "epoch": 15.00669344042838, "grad_norm": 0.19309072196483612, "learning_rate": 9.894970929468512e-05, "loss": 0.0167, "step": 11210 }, { "epoch": 15.02008032128514, "grad_norm": 0.20803076028823853, "learning_rate": 9.89463353889367e-05, "loss": 0.0186, "step": 11220 }, { "epoch": 15.033467202141901, "grad_norm": 0.17333148419857025, "learning_rate": 9.894295613049065e-05, "loss": 0.0162, "step": 11230 }, { "epoch": 15.046854082998662, "grad_norm": 0.2551964223384857, "learning_rate": 9.893957151971649e-05, "loss": 0.0192, "step": 11240 }, { "epoch": 15.060240963855422, "grad_norm": 0.4418059289455414, "learning_rate": 9.893618155698436e-05, "loss": 0.0161, "step": 11250 }, { "epoch": 15.073627844712181, "grad_norm": 0.24989378452301025, "learning_rate": 9.8932786242665e-05, "loss": 0.0161, "step": 11260 }, { "epoch": 15.087014725568942, "grad_norm": 0.18504315614700317, "learning_rate": 9.89293855771297e-05, "loss": 0.0144, "step": 11270 }, { "epoch": 15.100401606425702, "grad_norm": 0.39167261123657227, "learning_rate": 9.892597956075036e-05, "loss": 0.0143, "step": 11280 }, { "epoch": 15.113788487282463, "grad_norm": 0.17720000445842743, "learning_rate": 9.892256819389947e-05, "loss": 0.0137, "step": 11290 }, { "epoch": 15.127175368139223, "grad_norm": 0.185441255569458, "learning_rate": 9.891915147695006e-05, "loss": 0.0147, "step": 11300 }, { "epoch": 15.140562248995984, "grad_norm": 0.34506678581237793, "learning_rate": 9.891572941027577e-05, "loss": 0.0129, "step": 11310 }, { "epoch": 15.153949129852744, "grad_norm": 0.3829083740711212, "learning_rate": 9.89123019942509e-05, "loss": 0.0158, "step": 11320 }, { "epoch": 15.167336010709505, "grad_norm": 0.19213157892227173, "learning_rate": 9.89088692292502e-05, "loss": 0.0147, "step": 11330 }, { "epoch": 15.180722891566266, "grad_norm": 0.5467573404312134, "learning_rate": 9.89054311156491e-05, "loss": 0.0153, "step": 11340 }, { "epoch": 15.194109772423026, "grad_norm": 0.15095074474811554, "learning_rate": 9.890198765382357e-05, "loss": 0.0163, "step": 11350 }, { "epoch": 15.207496653279787, "grad_norm": 0.6123149394989014, "learning_rate": 9.889853884415021e-05, "loss": 0.0168, "step": 11360 }, { "epoch": 15.220883534136545, "grad_norm": 0.3114851415157318, "learning_rate": 9.889508468700614e-05, "loss": 0.0177, "step": 11370 }, { "epoch": 15.234270414993306, "grad_norm": 0.4178839921951294, "learning_rate": 9.889162518276915e-05, "loss": 0.018, "step": 11380 }, { "epoch": 15.247657295850066, "grad_norm": 0.32714375853538513, "learning_rate": 9.888816033181752e-05, "loss": 0.0153, "step": 11390 }, { "epoch": 15.261044176706827, "grad_norm": 0.2682907283306122, "learning_rate": 9.888469013453018e-05, "loss": 0.0201, "step": 11400 }, { "epoch": 15.274431057563588, "grad_norm": 0.2720004916191101, "learning_rate": 9.888121459128663e-05, "loss": 0.0173, "step": 11410 }, { "epoch": 15.287817938420348, "grad_norm": 0.3262924253940582, "learning_rate": 9.887773370246693e-05, "loss": 0.0197, "step": 11420 }, { "epoch": 15.301204819277109, "grad_norm": 0.25784972310066223, "learning_rate": 9.887424746845177e-05, "loss": 0.0155, "step": 11430 }, { "epoch": 15.31459170013387, "grad_norm": 0.26340752840042114, "learning_rate": 9.887075588962239e-05, "loss": 0.0183, "step": 11440 }, { "epoch": 15.32797858099063, "grad_norm": 0.29800355434417725, "learning_rate": 9.88672589663606e-05, "loss": 0.0153, "step": 11450 }, { "epoch": 15.34136546184739, "grad_norm": 0.3594421446323395, "learning_rate": 9.886375669904886e-05, "loss": 0.0172, "step": 11460 }, { "epoch": 15.35475234270415, "grad_norm": 0.1513180136680603, "learning_rate": 9.886024908807014e-05, "loss": 0.0162, "step": 11470 }, { "epoch": 15.36813922356091, "grad_norm": 0.31139788031578064, "learning_rate": 9.885673613380806e-05, "loss": 0.0153, "step": 11480 }, { "epoch": 15.38152610441767, "grad_norm": 0.20258665084838867, "learning_rate": 9.885321783664676e-05, "loss": 0.0147, "step": 11490 }, { "epoch": 15.39491298527443, "grad_norm": 0.18537989258766174, "learning_rate": 9.884969419697101e-05, "loss": 0.0158, "step": 11500 }, { "epoch": 15.408299866131191, "grad_norm": 0.2054307609796524, "learning_rate": 9.884616521516614e-05, "loss": 0.0144, "step": 11510 }, { "epoch": 15.421686746987952, "grad_norm": 0.2938424348831177, "learning_rate": 9.88426308916181e-05, "loss": 0.0167, "step": 11520 }, { "epoch": 15.435073627844712, "grad_norm": 0.18843060731887817, "learning_rate": 9.883909122671335e-05, "loss": 0.0167, "step": 11530 }, { "epoch": 15.448460508701473, "grad_norm": 0.1979917287826538, "learning_rate": 9.883554622083904e-05, "loss": 0.0189, "step": 11540 }, { "epoch": 15.461847389558233, "grad_norm": 0.2677730917930603, "learning_rate": 9.88319958743828e-05, "loss": 0.018, "step": 11550 }, { "epoch": 15.475234270414994, "grad_norm": 0.8813316822052002, "learning_rate": 9.882844018773291e-05, "loss": 0.0168, "step": 11560 }, { "epoch": 15.488621151271754, "grad_norm": 0.1580258309841156, "learning_rate": 9.882487916127823e-05, "loss": 0.0161, "step": 11570 }, { "epoch": 15.502008032128515, "grad_norm": 0.1878429800271988, "learning_rate": 9.882131279540815e-05, "loss": 0.0159, "step": 11580 }, { "epoch": 15.515394912985274, "grad_norm": 0.27762261033058167, "learning_rate": 9.881774109051271e-05, "loss": 0.0184, "step": 11590 }, { "epoch": 15.528781793842034, "grad_norm": 0.3572506010532379, "learning_rate": 9.881416404698252e-05, "loss": 0.0165, "step": 11600 }, { "epoch": 15.542168674698795, "grad_norm": 0.283662885427475, "learning_rate": 9.881058166520873e-05, "loss": 0.0169, "step": 11610 }, { "epoch": 15.555555555555555, "grad_norm": 0.22521689534187317, "learning_rate": 9.880699394558311e-05, "loss": 0.0178, "step": 11620 }, { "epoch": 15.568942436412316, "grad_norm": 0.2634669244289398, "learning_rate": 9.880340088849801e-05, "loss": 0.0196, "step": 11630 }, { "epoch": 15.582329317269076, "grad_norm": 0.5869012475013733, "learning_rate": 9.879980249434637e-05, "loss": 0.0216, "step": 11640 }, { "epoch": 15.595716198125837, "grad_norm": 0.29146623611450195, "learning_rate": 9.879619876352168e-05, "loss": 0.0189, "step": 11650 }, { "epoch": 15.609103078982598, "grad_norm": 0.2711389660835266, "learning_rate": 9.879258969641809e-05, "loss": 0.0204, "step": 11660 }, { "epoch": 15.622489959839358, "grad_norm": 0.3154110014438629, "learning_rate": 9.878897529343023e-05, "loss": 0.0197, "step": 11670 }, { "epoch": 15.635876840696119, "grad_norm": 0.23146508634090424, "learning_rate": 9.878535555495338e-05, "loss": 0.017, "step": 11680 }, { "epoch": 15.649263721552877, "grad_norm": 0.4534495174884796, "learning_rate": 9.87817304813834e-05, "loss": 0.0181, "step": 11690 }, { "epoch": 15.662650602409638, "grad_norm": 0.38818252086639404, "learning_rate": 9.877810007311671e-05, "loss": 0.0186, "step": 11700 }, { "epoch": 15.676037483266398, "grad_norm": 0.1975741982460022, "learning_rate": 9.877446433055035e-05, "loss": 0.0191, "step": 11710 }, { "epoch": 15.689424364123159, "grad_norm": 0.283000648021698, "learning_rate": 9.877082325408191e-05, "loss": 0.0168, "step": 11720 }, { "epoch": 15.70281124497992, "grad_norm": 0.25122180581092834, "learning_rate": 9.876717684410954e-05, "loss": 0.015, "step": 11730 }, { "epoch": 15.71619812583668, "grad_norm": 0.26694339513778687, "learning_rate": 9.876352510103204e-05, "loss": 0.0187, "step": 11740 }, { "epoch": 15.72958500669344, "grad_norm": 0.17465735971927643, "learning_rate": 9.875986802524875e-05, "loss": 0.0191, "step": 11750 }, { "epoch": 15.742971887550201, "grad_norm": 0.244941845536232, "learning_rate": 9.87562056171596e-05, "loss": 0.0173, "step": 11760 }, { "epoch": 15.756358768406962, "grad_norm": 0.1948918104171753, "learning_rate": 9.875253787716511e-05, "loss": 0.0166, "step": 11770 }, { "epoch": 15.769745649263722, "grad_norm": 0.16202802956104279, "learning_rate": 9.874886480566637e-05, "loss": 0.0158, "step": 11780 }, { "epoch": 15.783132530120483, "grad_norm": 0.20784536004066467, "learning_rate": 9.874518640306507e-05, "loss": 0.0137, "step": 11790 }, { "epoch": 15.796519410977242, "grad_norm": 0.5281943082809448, "learning_rate": 9.874150266976347e-05, "loss": 0.0165, "step": 11800 }, { "epoch": 15.809906291834002, "grad_norm": 0.17951710522174835, "learning_rate": 9.873781360616443e-05, "loss": 0.0168, "step": 11810 }, { "epoch": 15.823293172690763, "grad_norm": 0.6201582551002502, "learning_rate": 9.873411921267137e-05, "loss": 0.0161, "step": 11820 }, { "epoch": 15.836680053547523, "grad_norm": 0.3671571910381317, "learning_rate": 9.873041948968829e-05, "loss": 0.0184, "step": 11830 }, { "epoch": 15.850066934404284, "grad_norm": 0.34562042355537415, "learning_rate": 9.872671443761981e-05, "loss": 0.0208, "step": 11840 }, { "epoch": 15.863453815261044, "grad_norm": 1.3140385150909424, "learning_rate": 9.872300405687109e-05, "loss": 0.0182, "step": 11850 }, { "epoch": 15.876840696117805, "grad_norm": 0.33778873085975647, "learning_rate": 9.871928834784792e-05, "loss": 0.0169, "step": 11860 }, { "epoch": 15.890227576974565, "grad_norm": 0.2385193109512329, "learning_rate": 9.871556731095661e-05, "loss": 0.0161, "step": 11870 }, { "epoch": 15.903614457831326, "grad_norm": 0.23568281531333923, "learning_rate": 9.871184094660411e-05, "loss": 0.0189, "step": 11880 }, { "epoch": 15.917001338688086, "grad_norm": 0.2685134708881378, "learning_rate": 9.870810925519791e-05, "loss": 0.0176, "step": 11890 }, { "epoch": 15.930388219544845, "grad_norm": 0.29239463806152344, "learning_rate": 9.870437223714612e-05, "loss": 0.0169, "step": 11900 }, { "epoch": 15.943775100401606, "grad_norm": 0.27208590507507324, "learning_rate": 9.87006298928574e-05, "loss": 0.0172, "step": 11910 }, { "epoch": 15.957161981258366, "grad_norm": 0.22798927128314972, "learning_rate": 9.869688222274103e-05, "loss": 0.0165, "step": 11920 }, { "epoch": 15.970548862115127, "grad_norm": 0.2055317759513855, "learning_rate": 9.869312922720681e-05, "loss": 0.017, "step": 11930 }, { "epoch": 15.983935742971887, "grad_norm": 0.2680474519729614, "learning_rate": 9.868937090666521e-05, "loss": 0.0202, "step": 11940 }, { "epoch": 15.997322623828648, "grad_norm": 0.3744042217731476, "learning_rate": 9.86856072615272e-05, "loss": 0.0198, "step": 11950 }, { "epoch": 16.01070950468541, "grad_norm": 0.14972497522830963, "learning_rate": 9.868183829220438e-05, "loss": 0.0171, "step": 11960 }, { "epoch": 16.02409638554217, "grad_norm": 0.2115458995103836, "learning_rate": 9.867806399910893e-05, "loss": 0.017, "step": 11970 }, { "epoch": 16.03748326639893, "grad_norm": 0.20491629838943481, "learning_rate": 9.867428438265356e-05, "loss": 0.0178, "step": 11980 }, { "epoch": 16.05087014725569, "grad_norm": 0.22127868235111237, "learning_rate": 9.867049944325165e-05, "loss": 0.018, "step": 11990 }, { "epoch": 16.06425702811245, "grad_norm": 0.39197611808776855, "learning_rate": 9.86667091813171e-05, "loss": 0.0177, "step": 12000 }, { "epoch": 16.07764390896921, "grad_norm": 0.1900569647550583, "learning_rate": 9.866291359726438e-05, "loss": 0.0186, "step": 12010 }, { "epoch": 16.09103078982597, "grad_norm": 0.2694603204727173, "learning_rate": 9.865911269150861e-05, "loss": 0.0174, "step": 12020 }, { "epoch": 16.104417670682732, "grad_norm": 0.3467347025871277, "learning_rate": 9.865530646446544e-05, "loss": 0.0178, "step": 12030 }, { "epoch": 16.117804551539493, "grad_norm": 0.2283557802438736, "learning_rate": 9.86514949165511e-05, "loss": 0.0191, "step": 12040 }, { "epoch": 16.131191432396253, "grad_norm": 0.2048892080783844, "learning_rate": 9.864767804818243e-05, "loss": 0.0166, "step": 12050 }, { "epoch": 16.14457831325301, "grad_norm": 0.1314937025308609, "learning_rate": 9.86438558597768e-05, "loss": 0.0168, "step": 12060 }, { "epoch": 16.15796519410977, "grad_norm": 0.13147445023059845, "learning_rate": 9.864002835175225e-05, "loss": 0.018, "step": 12070 }, { "epoch": 16.17135207496653, "grad_norm": 0.22492243349552155, "learning_rate": 9.863619552452734e-05, "loss": 0.0192, "step": 12080 }, { "epoch": 16.184738955823292, "grad_norm": 0.32960087060928345, "learning_rate": 9.863235737852119e-05, "loss": 0.019, "step": 12090 }, { "epoch": 16.198125836680052, "grad_norm": 0.2506030201911926, "learning_rate": 9.862851391415356e-05, "loss": 0.0179, "step": 12100 }, { "epoch": 16.211512717536813, "grad_norm": 0.7897889018058777, "learning_rate": 9.862466513184477e-05, "loss": 0.0177, "step": 12110 }, { "epoch": 16.224899598393574, "grad_norm": 0.19309774041175842, "learning_rate": 9.86208110320157e-05, "loss": 0.0178, "step": 12120 }, { "epoch": 16.238286479250334, "grad_norm": 0.20556873083114624, "learning_rate": 9.861695161508784e-05, "loss": 0.0163, "step": 12130 }, { "epoch": 16.251673360107095, "grad_norm": 0.3041475713253021, "learning_rate": 9.861308688148324e-05, "loss": 0.0155, "step": 12140 }, { "epoch": 16.265060240963855, "grad_norm": 0.2031349390745163, "learning_rate": 9.860921683162455e-05, "loss": 0.0156, "step": 12150 }, { "epoch": 16.278447121820616, "grad_norm": 0.4446941018104553, "learning_rate": 9.860534146593499e-05, "loss": 0.0162, "step": 12160 }, { "epoch": 16.291834002677376, "grad_norm": 1.2944144010543823, "learning_rate": 9.860146078483836e-05, "loss": 0.0191, "step": 12170 }, { "epoch": 16.305220883534137, "grad_norm": 0.20584586262702942, "learning_rate": 9.859757478875905e-05, "loss": 0.0161, "step": 12180 }, { "epoch": 16.318607764390897, "grad_norm": 0.2647968530654907, "learning_rate": 9.859368347812204e-05, "loss": 0.0165, "step": 12190 }, { "epoch": 16.331994645247658, "grad_norm": 0.21282833814620972, "learning_rate": 9.858978685335285e-05, "loss": 0.0164, "step": 12200 }, { "epoch": 16.34538152610442, "grad_norm": 0.2070441097021103, "learning_rate": 9.858588491487763e-05, "loss": 0.0193, "step": 12210 }, { "epoch": 16.35876840696118, "grad_norm": 0.21296720206737518, "learning_rate": 9.858197766312308e-05, "loss": 0.0184, "step": 12220 }, { "epoch": 16.37215528781794, "grad_norm": 0.22538433969020844, "learning_rate": 9.857806509851649e-05, "loss": 0.0175, "step": 12230 }, { "epoch": 16.3855421686747, "grad_norm": 0.26746365427970886, "learning_rate": 9.857414722148574e-05, "loss": 0.0182, "step": 12240 }, { "epoch": 16.39892904953146, "grad_norm": 0.3733600974082947, "learning_rate": 9.857022403245928e-05, "loss": 0.0193, "step": 12250 }, { "epoch": 16.41231593038822, "grad_norm": 0.23012441396713257, "learning_rate": 9.856629553186615e-05, "loss": 0.0148, "step": 12260 }, { "epoch": 16.42570281124498, "grad_norm": 0.21597641706466675, "learning_rate": 9.856236172013595e-05, "loss": 0.0148, "step": 12270 }, { "epoch": 16.43908969210174, "grad_norm": 0.2875555157661438, "learning_rate": 9.85584225976989e-05, "loss": 0.0179, "step": 12280 }, { "epoch": 16.4524765729585, "grad_norm": 0.6131165027618408, "learning_rate": 9.855447816498575e-05, "loss": 0.0166, "step": 12290 }, { "epoch": 16.46586345381526, "grad_norm": 0.5611546039581299, "learning_rate": 9.855052842242787e-05, "loss": 0.0142, "step": 12300 }, { "epoch": 16.47925033467202, "grad_norm": 0.3055652976036072, "learning_rate": 9.85465733704572e-05, "loss": 0.0186, "step": 12310 }, { "epoch": 16.49263721552878, "grad_norm": 0.34118586778640747, "learning_rate": 9.854261300950624e-05, "loss": 0.0194, "step": 12320 }, { "epoch": 16.50602409638554, "grad_norm": 0.26824748516082764, "learning_rate": 9.853864734000813e-05, "loss": 0.0157, "step": 12330 }, { "epoch": 16.519410977242302, "grad_norm": 0.19408759474754333, "learning_rate": 9.85346763623965e-05, "loss": 0.014, "step": 12340 }, { "epoch": 16.532797858099062, "grad_norm": 0.20975103974342346, "learning_rate": 9.853070007710564e-05, "loss": 0.0173, "step": 12350 }, { "epoch": 16.546184738955823, "grad_norm": 0.27445077896118164, "learning_rate": 9.85267184845704e-05, "loss": 0.0182, "step": 12360 }, { "epoch": 16.559571619812584, "grad_norm": 0.47994041442871094, "learning_rate": 9.852273158522616e-05, "loss": 0.0152, "step": 12370 }, { "epoch": 16.572958500669344, "grad_norm": 0.25040629506111145, "learning_rate": 9.851873937950896e-05, "loss": 0.0134, "step": 12380 }, { "epoch": 16.586345381526105, "grad_norm": 0.21168652176856995, "learning_rate": 9.851474186785537e-05, "loss": 0.0152, "step": 12390 }, { "epoch": 16.599732262382865, "grad_norm": 0.20125913619995117, "learning_rate": 9.851073905070254e-05, "loss": 0.0162, "step": 12400 }, { "epoch": 16.613119143239626, "grad_norm": 0.20306985080242157, "learning_rate": 9.850673092848824e-05, "loss": 0.0141, "step": 12410 }, { "epoch": 16.626506024096386, "grad_norm": 0.555189311504364, "learning_rate": 9.850271750165077e-05, "loss": 0.0139, "step": 12420 }, { "epoch": 16.639892904953147, "grad_norm": 0.582740068435669, "learning_rate": 9.849869877062902e-05, "loss": 0.0183, "step": 12430 }, { "epoch": 16.653279785809907, "grad_norm": 0.42308175563812256, "learning_rate": 9.849467473586252e-05, "loss": 0.019, "step": 12440 }, { "epoch": 16.666666666666668, "grad_norm": 0.20151346921920776, "learning_rate": 9.849064539779127e-05, "loss": 0.0134, "step": 12450 }, { "epoch": 16.68005354752343, "grad_norm": 0.18690825998783112, "learning_rate": 9.848661075685594e-05, "loss": 0.0157, "step": 12460 }, { "epoch": 16.69344042838019, "grad_norm": 0.2975409924983978, "learning_rate": 9.848257081349778e-05, "loss": 0.0131, "step": 12470 }, { "epoch": 16.70682730923695, "grad_norm": 0.3316382169723511, "learning_rate": 9.847852556815856e-05, "loss": 0.0181, "step": 12480 }, { "epoch": 16.720214190093706, "grad_norm": 0.7213724851608276, "learning_rate": 9.847447502128067e-05, "loss": 0.0199, "step": 12490 }, { "epoch": 16.733601070950467, "grad_norm": 0.21670959889888763, "learning_rate": 9.847041917330708e-05, "loss": 0.0186, "step": 12500 }, { "epoch": 16.746987951807228, "grad_norm": 0.32374224066734314, "learning_rate": 9.846635802468132e-05, "loss": 0.0154, "step": 12510 }, { "epoch": 16.760374832663988, "grad_norm": 0.2979527711868286, "learning_rate": 9.84622915758475e-05, "loss": 0.0172, "step": 12520 }, { "epoch": 16.77376171352075, "grad_norm": 0.2211829572916031, "learning_rate": 9.845821982725034e-05, "loss": 0.0144, "step": 12530 }, { "epoch": 16.78714859437751, "grad_norm": 0.37257352471351624, "learning_rate": 9.845414277933514e-05, "loss": 0.0143, "step": 12540 }, { "epoch": 16.80053547523427, "grad_norm": 0.2451143115758896, "learning_rate": 9.845006043254771e-05, "loss": 0.0159, "step": 12550 }, { "epoch": 16.81392235609103, "grad_norm": 0.19563058018684387, "learning_rate": 9.844597278733451e-05, "loss": 0.0139, "step": 12560 }, { "epoch": 16.82730923694779, "grad_norm": 0.6968286633491516, "learning_rate": 9.844187984414259e-05, "loss": 0.0177, "step": 12570 }, { "epoch": 16.84069611780455, "grad_norm": 0.2958356440067291, "learning_rate": 9.84377816034195e-05, "loss": 0.0156, "step": 12580 }, { "epoch": 16.854082998661312, "grad_norm": 0.20015014708042145, "learning_rate": 9.843367806561345e-05, "loss": 0.0222, "step": 12590 }, { "epoch": 16.867469879518072, "grad_norm": 0.16933320462703705, "learning_rate": 9.842956923117317e-05, "loss": 0.0162, "step": 12600 }, { "epoch": 16.880856760374833, "grad_norm": 0.22539490461349487, "learning_rate": 9.842545510054802e-05, "loss": 0.0158, "step": 12610 }, { "epoch": 16.894243641231594, "grad_norm": 0.5160669684410095, "learning_rate": 9.842133567418792e-05, "loss": 0.0158, "step": 12620 }, { "epoch": 16.907630522088354, "grad_norm": 0.24292828142642975, "learning_rate": 9.841721095254333e-05, "loss": 0.0144, "step": 12630 }, { "epoch": 16.921017402945115, "grad_norm": 0.30762261152267456, "learning_rate": 9.841308093606537e-05, "loss": 0.0176, "step": 12640 }, { "epoch": 16.934404283801875, "grad_norm": 0.18963487446308136, "learning_rate": 9.840894562520565e-05, "loss": 0.0177, "step": 12650 }, { "epoch": 16.947791164658636, "grad_norm": 0.1689358502626419, "learning_rate": 9.840480502041642e-05, "loss": 0.0168, "step": 12660 }, { "epoch": 16.961178045515396, "grad_norm": 0.2902960777282715, "learning_rate": 9.840065912215049e-05, "loss": 0.0176, "step": 12670 }, { "epoch": 16.974564926372157, "grad_norm": 0.1897760033607483, "learning_rate": 9.839650793086124e-05, "loss": 0.0153, "step": 12680 }, { "epoch": 16.987951807228917, "grad_norm": 0.16837826371192932, "learning_rate": 9.839235144700265e-05, "loss": 0.0171, "step": 12690 }, { "epoch": 17.001338688085674, "grad_norm": 0.29400044679641724, "learning_rate": 9.838818967102926e-05, "loss": 0.0188, "step": 12700 }, { "epoch": 17.014725568942435, "grad_norm": 0.18730315566062927, "learning_rate": 9.83840226033962e-05, "loss": 0.0154, "step": 12710 }, { "epoch": 17.028112449799195, "grad_norm": 0.2068309634923935, "learning_rate": 9.837985024455918e-05, "loss": 0.0133, "step": 12720 }, { "epoch": 17.041499330655956, "grad_norm": 0.27424973249435425, "learning_rate": 9.837567259497447e-05, "loss": 0.017, "step": 12730 }, { "epoch": 17.054886211512716, "grad_norm": 0.13100029528141022, "learning_rate": 9.837148965509894e-05, "loss": 0.0198, "step": 12740 }, { "epoch": 17.068273092369477, "grad_norm": 0.16344769299030304, "learning_rate": 9.836730142539001e-05, "loss": 0.014, "step": 12750 }, { "epoch": 17.081659973226238, "grad_norm": 0.33051082491874695, "learning_rate": 9.836310790630574e-05, "loss": 0.0201, "step": 12760 }, { "epoch": 17.095046854082998, "grad_norm": 0.2422577291727066, "learning_rate": 9.83589090983047e-05, "loss": 0.0204, "step": 12770 }, { "epoch": 17.10843373493976, "grad_norm": 0.3812709152698517, "learning_rate": 9.835470500184605e-05, "loss": 0.0172, "step": 12780 }, { "epoch": 17.12182061579652, "grad_norm": 0.2510240375995636, "learning_rate": 9.835049561738957e-05, "loss": 0.0159, "step": 12790 }, { "epoch": 17.13520749665328, "grad_norm": 0.5182105898857117, "learning_rate": 9.834628094539558e-05, "loss": 0.0193, "step": 12800 }, { "epoch": 17.14859437751004, "grad_norm": 0.44465509057044983, "learning_rate": 9.834206098632499e-05, "loss": 0.0162, "step": 12810 }, { "epoch": 17.1619812583668, "grad_norm": 0.24358157813549042, "learning_rate": 9.833783574063931e-05, "loss": 0.0153, "step": 12820 }, { "epoch": 17.17536813922356, "grad_norm": 0.2347715049982071, "learning_rate": 9.833360520880058e-05, "loss": 0.0173, "step": 12830 }, { "epoch": 17.188755020080322, "grad_norm": 0.33340805768966675, "learning_rate": 9.832936939127144e-05, "loss": 0.0144, "step": 12840 }, { "epoch": 17.202141900937082, "grad_norm": 0.19754306972026825, "learning_rate": 9.832512828851515e-05, "loss": 0.0138, "step": 12850 }, { "epoch": 17.215528781793843, "grad_norm": 0.2547762393951416, "learning_rate": 9.832088190099546e-05, "loss": 0.0176, "step": 12860 }, { "epoch": 17.228915662650603, "grad_norm": 0.36947622895240784, "learning_rate": 9.831663022917679e-05, "loss": 0.0185, "step": 12870 }, { "epoch": 17.242302543507364, "grad_norm": 0.21709905564785004, "learning_rate": 9.831237327352407e-05, "loss": 0.0185, "step": 12880 }, { "epoch": 17.255689424364125, "grad_norm": 0.283500999212265, "learning_rate": 9.830811103450286e-05, "loss": 0.0161, "step": 12890 }, { "epoch": 17.269076305220885, "grad_norm": 0.33789655566215515, "learning_rate": 9.830384351257924e-05, "loss": 0.0173, "step": 12900 }, { "epoch": 17.282463186077646, "grad_norm": 0.18047276139259338, "learning_rate": 9.829957070821993e-05, "loss": 0.0201, "step": 12910 }, { "epoch": 17.295850066934403, "grad_norm": 0.2350664883852005, "learning_rate": 9.829529262189218e-05, "loss": 0.0162, "step": 12920 }, { "epoch": 17.309236947791163, "grad_norm": 0.20072762668132782, "learning_rate": 9.829100925406385e-05, "loss": 0.0154, "step": 12930 }, { "epoch": 17.322623828647924, "grad_norm": 0.1164010539650917, "learning_rate": 9.828672060520333e-05, "loss": 0.0171, "step": 12940 }, { "epoch": 17.336010709504684, "grad_norm": 0.2572783827781677, "learning_rate": 9.828242667577966e-05, "loss": 0.0146, "step": 12950 }, { "epoch": 17.349397590361445, "grad_norm": 0.222123920917511, "learning_rate": 9.82781274662624e-05, "loss": 0.0165, "step": 12960 }, { "epoch": 17.362784471218205, "grad_norm": 0.7329559922218323, "learning_rate": 9.82738229771217e-05, "loss": 0.0174, "step": 12970 }, { "epoch": 17.376171352074966, "grad_norm": 0.15666459500789642, "learning_rate": 9.826951320882829e-05, "loss": 0.0136, "step": 12980 }, { "epoch": 17.389558232931726, "grad_norm": 0.15610887110233307, "learning_rate": 9.826519816185351e-05, "loss": 0.0134, "step": 12990 }, { "epoch": 17.402945113788487, "grad_norm": 0.17671097815036774, "learning_rate": 9.826087783666921e-05, "loss": 0.0158, "step": 13000 }, { "epoch": 17.416331994645248, "grad_norm": 0.20196092128753662, "learning_rate": 9.825655223374787e-05, "loss": 0.0197, "step": 13010 }, { "epoch": 17.429718875502008, "grad_norm": 0.4031938910484314, "learning_rate": 9.825222135356253e-05, "loss": 0.0137, "step": 13020 }, { "epoch": 17.44310575635877, "grad_norm": 0.16031500697135925, "learning_rate": 9.82478851965868e-05, "loss": 0.0157, "step": 13030 }, { "epoch": 17.45649263721553, "grad_norm": 0.2510172128677368, "learning_rate": 9.82435437632949e-05, "loss": 0.016, "step": 13040 }, { "epoch": 17.46987951807229, "grad_norm": 0.33196815848350525, "learning_rate": 9.823919705416158e-05, "loss": 0.0159, "step": 13050 }, { "epoch": 17.48326639892905, "grad_norm": 0.32359808683395386, "learning_rate": 9.82348450696622e-05, "loss": 0.0147, "step": 13060 }, { "epoch": 17.49665327978581, "grad_norm": 0.16346846520900726, "learning_rate": 9.823048781027268e-05, "loss": 0.0166, "step": 13070 }, { "epoch": 17.51004016064257, "grad_norm": 0.1864239126443863, "learning_rate": 9.822612527646953e-05, "loss": 0.0137, "step": 13080 }, { "epoch": 17.523427041499332, "grad_norm": 0.15533262491226196, "learning_rate": 9.822175746872984e-05, "loss": 0.0165, "step": 13090 }, { "epoch": 17.536813922356092, "grad_norm": 0.2489418238401413, "learning_rate": 9.821738438753123e-05, "loss": 0.0164, "step": 13100 }, { "epoch": 17.550200803212853, "grad_norm": 0.12960578501224518, "learning_rate": 9.821300603335196e-05, "loss": 0.0143, "step": 13110 }, { "epoch": 17.563587684069613, "grad_norm": 0.13922645151615143, "learning_rate": 9.820862240667085e-05, "loss": 0.0158, "step": 13120 }, { "epoch": 17.57697456492637, "grad_norm": 0.18650738894939423, "learning_rate": 9.820423350796726e-05, "loss": 0.0147, "step": 13130 }, { "epoch": 17.59036144578313, "grad_norm": 0.1896267533302307, "learning_rate": 9.819983933772118e-05, "loss": 0.0147, "step": 13140 }, { "epoch": 17.60374832663989, "grad_norm": 0.6531825661659241, "learning_rate": 9.819543989641314e-05, "loss": 0.0147, "step": 13150 }, { "epoch": 17.617135207496652, "grad_norm": 0.25973400473594666, "learning_rate": 9.819103518452423e-05, "loss": 0.0148, "step": 13160 }, { "epoch": 17.630522088353413, "grad_norm": 0.9621050953865051, "learning_rate": 9.818662520253618e-05, "loss": 0.0159, "step": 13170 }, { "epoch": 17.643908969210173, "grad_norm": 0.21709130704402924, "learning_rate": 9.818220995093126e-05, "loss": 0.015, "step": 13180 }, { "epoch": 17.657295850066934, "grad_norm": 0.17926453053951263, "learning_rate": 9.817778943019228e-05, "loss": 0.0144, "step": 13190 }, { "epoch": 17.670682730923694, "grad_norm": 0.16245950758457184, "learning_rate": 9.81733636408027e-05, "loss": 0.0163, "step": 13200 }, { "epoch": 17.684069611780455, "grad_norm": 0.17773640155792236, "learning_rate": 9.816893258324649e-05, "loss": 0.0155, "step": 13210 }, { "epoch": 17.697456492637215, "grad_norm": 0.22210729122161865, "learning_rate": 9.816449625800823e-05, "loss": 0.0165, "step": 13220 }, { "epoch": 17.710843373493976, "grad_norm": 0.1936355084180832, "learning_rate": 9.816005466557308e-05, "loss": 0.0153, "step": 13230 }, { "epoch": 17.724230254350736, "grad_norm": 0.2035146951675415, "learning_rate": 9.815560780642674e-05, "loss": 0.0185, "step": 13240 }, { "epoch": 17.737617135207497, "grad_norm": 0.21663375198841095, "learning_rate": 9.815115568105555e-05, "loss": 0.0133, "step": 13250 }, { "epoch": 17.751004016064257, "grad_norm": 0.19796165823936462, "learning_rate": 9.814669828994638e-05, "loss": 0.0149, "step": 13260 }, { "epoch": 17.764390896921018, "grad_norm": 0.21649663150310516, "learning_rate": 9.814223563358665e-05, "loss": 0.0159, "step": 13270 }, { "epoch": 17.77777777777778, "grad_norm": 0.24462516605854034, "learning_rate": 9.813776771246443e-05, "loss": 0.018, "step": 13280 }, { "epoch": 17.79116465863454, "grad_norm": 0.19487178325653076, "learning_rate": 9.813329452706829e-05, "loss": 0.0167, "step": 13290 }, { "epoch": 17.8045515394913, "grad_norm": 0.20111429691314697, "learning_rate": 9.812881607788744e-05, "loss": 0.0154, "step": 13300 }, { "epoch": 17.81793842034806, "grad_norm": 0.5912930965423584, "learning_rate": 9.812433236541163e-05, "loss": 0.0155, "step": 13310 }, { "epoch": 17.83132530120482, "grad_norm": 0.35051625967025757, "learning_rate": 9.811984339013116e-05, "loss": 0.0153, "step": 13320 }, { "epoch": 17.84471218206158, "grad_norm": 0.4613930583000183, "learning_rate": 9.811534915253698e-05, "loss": 0.0142, "step": 13330 }, { "epoch": 17.858099062918342, "grad_norm": 0.29184117913246155, "learning_rate": 9.811084965312056e-05, "loss": 0.02, "step": 13340 }, { "epoch": 17.8714859437751, "grad_norm": 0.29247525334358215, "learning_rate": 9.810634489237396e-05, "loss": 0.0153, "step": 13350 }, { "epoch": 17.88487282463186, "grad_norm": 0.2570517957210541, "learning_rate": 9.81018348707898e-05, "loss": 0.0162, "step": 13360 }, { "epoch": 17.89825970548862, "grad_norm": 0.9819782972335815, "learning_rate": 9.809731958886131e-05, "loss": 0.0192, "step": 13370 }, { "epoch": 17.91164658634538, "grad_norm": 1.1186541318893433, "learning_rate": 9.809279904708224e-05, "loss": 0.0203, "step": 13380 }, { "epoch": 17.92503346720214, "grad_norm": 0.20992302894592285, "learning_rate": 9.808827324594699e-05, "loss": 0.0168, "step": 13390 }, { "epoch": 17.9384203480589, "grad_norm": 0.17676512897014618, "learning_rate": 9.808374218595046e-05, "loss": 0.0194, "step": 13400 }, { "epoch": 17.951807228915662, "grad_norm": 0.22193527221679688, "learning_rate": 9.80792058675882e-05, "loss": 0.0191, "step": 13410 }, { "epoch": 17.965194109772423, "grad_norm": 0.1754898875951767, "learning_rate": 9.807466429135627e-05, "loss": 0.0182, "step": 13420 }, { "epoch": 17.978580990629183, "grad_norm": 0.25557631254196167, "learning_rate": 9.807011745775132e-05, "loss": 0.019, "step": 13430 }, { "epoch": 17.991967871485944, "grad_norm": 0.19490201771259308, "learning_rate": 9.806556536727061e-05, "loss": 0.0155, "step": 13440 }, { "epoch": 18.005354752342704, "grad_norm": 0.2171422690153122, "learning_rate": 9.806100802041193e-05, "loss": 0.0186, "step": 13450 }, { "epoch": 18.018741633199465, "grad_norm": 0.26655519008636475, "learning_rate": 9.805644541767368e-05, "loss": 0.0167, "step": 13460 }, { "epoch": 18.032128514056225, "grad_norm": 0.2553789019584656, "learning_rate": 9.805187755955478e-05, "loss": 0.0169, "step": 13470 }, { "epoch": 18.045515394912986, "grad_norm": 0.1754475235939026, "learning_rate": 9.804730444655483e-05, "loss": 0.0158, "step": 13480 }, { "epoch": 18.058902275769746, "grad_norm": 0.17835749685764313, "learning_rate": 9.804272607917388e-05, "loss": 0.0196, "step": 13490 }, { "epoch": 18.072289156626507, "grad_norm": 0.19547522068023682, "learning_rate": 9.803814245791265e-05, "loss": 0.0147, "step": 13500 }, { "epoch": 18.085676037483267, "grad_norm": 0.2640862464904785, "learning_rate": 9.803355358327239e-05, "loss": 0.0161, "step": 13510 }, { "epoch": 18.099062918340028, "grad_norm": 0.25346800684928894, "learning_rate": 9.802895945575492e-05, "loss": 0.0173, "step": 13520 }, { "epoch": 18.11244979919679, "grad_norm": 0.20017075538635254, "learning_rate": 9.802436007586266e-05, "loss": 0.0143, "step": 13530 }, { "epoch": 18.12583668005355, "grad_norm": 0.2098115086555481, "learning_rate": 9.801975544409858e-05, "loss": 0.0164, "step": 13540 }, { "epoch": 18.13922356091031, "grad_norm": 0.23053407669067383, "learning_rate": 9.801514556096625e-05, "loss": 0.0149, "step": 13550 }, { "epoch": 18.152610441767067, "grad_norm": 0.33070388436317444, "learning_rate": 9.801053042696977e-05, "loss": 0.0187, "step": 13560 }, { "epoch": 18.165997322623827, "grad_norm": 0.2396857738494873, "learning_rate": 9.800591004261388e-05, "loss": 0.017, "step": 13570 }, { "epoch": 18.179384203480588, "grad_norm": 0.20403321087360382, "learning_rate": 9.800128440840385e-05, "loss": 0.0168, "step": 13580 }, { "epoch": 18.19277108433735, "grad_norm": 0.8992350101470947, "learning_rate": 9.799665352484552e-05, "loss": 0.0172, "step": 13590 }, { "epoch": 18.20615796519411, "grad_norm": 0.26354512572288513, "learning_rate": 9.799201739244532e-05, "loss": 0.017, "step": 13600 }, { "epoch": 18.21954484605087, "grad_norm": 0.2602281868457794, "learning_rate": 9.798737601171025e-05, "loss": 0.0154, "step": 13610 }, { "epoch": 18.23293172690763, "grad_norm": 0.2891719937324524, "learning_rate": 9.79827293831479e-05, "loss": 0.0148, "step": 13620 }, { "epoch": 18.24631860776439, "grad_norm": 1.077043890953064, "learning_rate": 9.797807750726638e-05, "loss": 0.0155, "step": 13630 }, { "epoch": 18.25970548862115, "grad_norm": 0.7737524509429932, "learning_rate": 9.797342038457446e-05, "loss": 0.016, "step": 13640 }, { "epoch": 18.27309236947791, "grad_norm": 0.25626516342163086, "learning_rate": 9.796875801558141e-05, "loss": 0.016, "step": 13650 }, { "epoch": 18.286479250334672, "grad_norm": 0.23137736320495605, "learning_rate": 9.79640904007971e-05, "loss": 0.0165, "step": 13660 }, { "epoch": 18.299866131191433, "grad_norm": 0.3154360353946686, "learning_rate": 9.795941754073199e-05, "loss": 0.0169, "step": 13670 }, { "epoch": 18.313253012048193, "grad_norm": 0.21683329343795776, "learning_rate": 9.795473943589705e-05, "loss": 0.0162, "step": 13680 }, { "epoch": 18.326639892904954, "grad_norm": 0.23721499741077423, "learning_rate": 9.795005608680394e-05, "loss": 0.0185, "step": 13690 }, { "epoch": 18.340026773761714, "grad_norm": 0.22671939432621002, "learning_rate": 9.794536749396477e-05, "loss": 0.0177, "step": 13700 }, { "epoch": 18.353413654618475, "grad_norm": 0.28157931566238403, "learning_rate": 9.79406736578923e-05, "loss": 0.0162, "step": 13710 }, { "epoch": 18.366800535475235, "grad_norm": 0.29191792011260986, "learning_rate": 9.793597457909984e-05, "loss": 0.0148, "step": 13720 }, { "epoch": 18.380187416331996, "grad_norm": 0.2575299143791199, "learning_rate": 9.793127025810127e-05, "loss": 0.0154, "step": 13730 }, { "epoch": 18.393574297188756, "grad_norm": 0.41560885310173035, "learning_rate": 9.792656069541104e-05, "loss": 0.015, "step": 13740 }, { "epoch": 18.406961178045517, "grad_norm": 1.235237717628479, "learning_rate": 9.79218458915442e-05, "loss": 0.0201, "step": 13750 }, { "epoch": 18.420348058902277, "grad_norm": 0.19464746117591858, "learning_rate": 9.791712584701634e-05, "loss": 0.0183, "step": 13760 }, { "epoch": 18.433734939759034, "grad_norm": 0.27096691727638245, "learning_rate": 9.791240056234364e-05, "loss": 0.0158, "step": 13770 }, { "epoch": 18.447121820615795, "grad_norm": 0.18719588220119476, "learning_rate": 9.790767003804283e-05, "loss": 0.0167, "step": 13780 }, { "epoch": 18.460508701472556, "grad_norm": 0.2622232735157013, "learning_rate": 9.790293427463126e-05, "loss": 0.014, "step": 13790 }, { "epoch": 18.473895582329316, "grad_norm": 0.24301709234714508, "learning_rate": 9.789819327262684e-05, "loss": 0.0153, "step": 13800 }, { "epoch": 18.487282463186077, "grad_norm": 0.247647225856781, "learning_rate": 9.7893447032548e-05, "loss": 0.0143, "step": 13810 }, { "epoch": 18.500669344042837, "grad_norm": 0.26646140217781067, "learning_rate": 9.78886955549138e-05, "loss": 0.0154, "step": 13820 }, { "epoch": 18.514056224899598, "grad_norm": 0.17193563282489777, "learning_rate": 9.788393884024387e-05, "loss": 0.0153, "step": 13830 }, { "epoch": 18.527443105756358, "grad_norm": 0.2174520045518875, "learning_rate": 9.787917688905836e-05, "loss": 0.0188, "step": 13840 }, { "epoch": 18.54082998661312, "grad_norm": 0.24167196452617645, "learning_rate": 9.787440970187807e-05, "loss": 0.0151, "step": 13850 }, { "epoch": 18.55421686746988, "grad_norm": 0.1966535449028015, "learning_rate": 9.786963727922429e-05, "loss": 0.0147, "step": 13860 }, { "epoch": 18.56760374832664, "grad_norm": 0.7078732848167419, "learning_rate": 9.786485962161897e-05, "loss": 0.0125, "step": 13870 }, { "epoch": 18.5809906291834, "grad_norm": 0.9016069769859314, "learning_rate": 9.786007672958455e-05, "loss": 0.0174, "step": 13880 }, { "epoch": 18.59437751004016, "grad_norm": 0.21701596677303314, "learning_rate": 9.78552886036441e-05, "loss": 0.0159, "step": 13890 }, { "epoch": 18.60776439089692, "grad_norm": 0.3274746537208557, "learning_rate": 9.785049524432124e-05, "loss": 0.0179, "step": 13900 }, { "epoch": 18.621151271753682, "grad_norm": 0.32368406653404236, "learning_rate": 9.784569665214016e-05, "loss": 0.0158, "step": 13910 }, { "epoch": 18.634538152610443, "grad_norm": 0.23865607380867004, "learning_rate": 9.784089282762563e-05, "loss": 0.0166, "step": 13920 }, { "epoch": 18.647925033467203, "grad_norm": 0.29314830899238586, "learning_rate": 9.7836083771303e-05, "loss": 0.017, "step": 13930 }, { "epoch": 18.661311914323964, "grad_norm": 0.24343854188919067, "learning_rate": 9.783126948369817e-05, "loss": 0.0159, "step": 13940 }, { "epoch": 18.674698795180724, "grad_norm": 0.24993287026882172, "learning_rate": 9.78264499653376e-05, "loss": 0.0135, "step": 13950 }, { "epoch": 18.688085676037485, "grad_norm": 0.31334081292152405, "learning_rate": 9.782162521674838e-05, "loss": 0.0151, "step": 13960 }, { "epoch": 18.701472556894245, "grad_norm": 0.20607131719589233, "learning_rate": 9.781679523845812e-05, "loss": 0.0148, "step": 13970 }, { "epoch": 18.714859437751002, "grad_norm": 0.3498830795288086, "learning_rate": 9.781196003099502e-05, "loss": 0.0177, "step": 13980 }, { "epoch": 18.728246318607763, "grad_norm": 0.203077033162117, "learning_rate": 9.780711959488786e-05, "loss": 0.0153, "step": 13990 }, { "epoch": 18.741633199464523, "grad_norm": 0.23466747999191284, "learning_rate": 9.780227393066599e-05, "loss": 0.0175, "step": 14000 }, { "epoch": 18.755020080321284, "grad_norm": 0.24423496425151825, "learning_rate": 9.77974230388593e-05, "loss": 0.0152, "step": 14010 }, { "epoch": 18.768406961178044, "grad_norm": 0.23295237123966217, "learning_rate": 9.779256691999829e-05, "loss": 0.0155, "step": 14020 }, { "epoch": 18.781793842034805, "grad_norm": 0.16653284430503845, "learning_rate": 9.778770557461403e-05, "loss": 0.017, "step": 14030 }, { "epoch": 18.795180722891565, "grad_norm": 0.19751045107841492, "learning_rate": 9.778283900323812e-05, "loss": 0.0159, "step": 14040 }, { "epoch": 18.808567603748326, "grad_norm": 0.1892784982919693, "learning_rate": 9.777796720640277e-05, "loss": 0.0148, "step": 14050 }, { "epoch": 18.821954484605087, "grad_norm": 0.26604434847831726, "learning_rate": 9.777309018464078e-05, "loss": 0.0158, "step": 14060 }, { "epoch": 18.835341365461847, "grad_norm": 0.3511672019958496, "learning_rate": 9.776820793848547e-05, "loss": 0.0167, "step": 14070 }, { "epoch": 18.848728246318608, "grad_norm": 0.23904167115688324, "learning_rate": 9.776332046847075e-05, "loss": 0.0129, "step": 14080 }, { "epoch": 18.862115127175368, "grad_norm": 0.20130571722984314, "learning_rate": 9.775842777513111e-05, "loss": 0.0145, "step": 14090 }, { "epoch": 18.87550200803213, "grad_norm": 0.23312722146511078, "learning_rate": 9.775352985900163e-05, "loss": 0.0135, "step": 14100 }, { "epoch": 18.88888888888889, "grad_norm": 0.17954453825950623, "learning_rate": 9.774862672061791e-05, "loss": 0.0155, "step": 14110 }, { "epoch": 18.90227576974565, "grad_norm": 0.18986691534519196, "learning_rate": 9.774371836051616e-05, "loss": 0.0144, "step": 14120 }, { "epoch": 18.91566265060241, "grad_norm": 0.31767138838768005, "learning_rate": 9.773880477923315e-05, "loss": 0.0175, "step": 14130 }, { "epoch": 18.92904953145917, "grad_norm": 2.3537158966064453, "learning_rate": 9.773388597730623e-05, "loss": 0.016, "step": 14140 }, { "epoch": 18.94243641231593, "grad_norm": 0.2391567826271057, "learning_rate": 9.77289619552733e-05, "loss": 0.0144, "step": 14150 }, { "epoch": 18.955823293172692, "grad_norm": 0.18216514587402344, "learning_rate": 9.772403271367285e-05, "loss": 0.0149, "step": 14160 }, { "epoch": 18.969210174029453, "grad_norm": 0.20633351802825928, "learning_rate": 9.771909825304396e-05, "loss": 0.0161, "step": 14170 }, { "epoch": 18.982597054886213, "grad_norm": 0.18373635411262512, "learning_rate": 9.771415857392619e-05, "loss": 0.0169, "step": 14180 }, { "epoch": 18.99598393574297, "grad_norm": 0.2260683923959732, "learning_rate": 9.770921367685978e-05, "loss": 0.0143, "step": 14190 }, { "epoch": 19.00937081659973, "grad_norm": 0.22258928418159485, "learning_rate": 9.770426356238551e-05, "loss": 0.0162, "step": 14200 }, { "epoch": 19.02275769745649, "grad_norm": 0.20100829005241394, "learning_rate": 9.769930823104469e-05, "loss": 0.0149, "step": 14210 }, { "epoch": 19.03614457831325, "grad_norm": 0.2877759337425232, "learning_rate": 9.769434768337926e-05, "loss": 0.0153, "step": 14220 }, { "epoch": 19.049531459170012, "grad_norm": 0.4011650085449219, "learning_rate": 9.768938191993164e-05, "loss": 0.0153, "step": 14230 }, { "epoch": 19.062918340026773, "grad_norm": 0.1781289279460907, "learning_rate": 9.768441094124494e-05, "loss": 0.0156, "step": 14240 }, { "epoch": 19.076305220883533, "grad_norm": 0.260172575712204, "learning_rate": 9.767943474786275e-05, "loss": 0.018, "step": 14250 }, { "epoch": 19.089692101740294, "grad_norm": 0.8464676737785339, "learning_rate": 9.767445334032923e-05, "loss": 0.0175, "step": 14260 }, { "epoch": 19.103078982597054, "grad_norm": 0.25109413266181946, "learning_rate": 9.766946671918919e-05, "loss": 0.0144, "step": 14270 }, { "epoch": 19.116465863453815, "grad_norm": 0.4102210998535156, "learning_rate": 9.766447488498796e-05, "loss": 0.0166, "step": 14280 }, { "epoch": 19.129852744310575, "grad_norm": 0.1587594449520111, "learning_rate": 9.765947783827139e-05, "loss": 0.0157, "step": 14290 }, { "epoch": 19.143239625167336, "grad_norm": 0.16690212488174438, "learning_rate": 9.765447557958599e-05, "loss": 0.0121, "step": 14300 }, { "epoch": 19.156626506024097, "grad_norm": 0.17810533940792084, "learning_rate": 9.764946810947879e-05, "loss": 0.015, "step": 14310 }, { "epoch": 19.170013386880857, "grad_norm": 0.24483267962932587, "learning_rate": 9.764445542849738e-05, "loss": 0.0146, "step": 14320 }, { "epoch": 19.183400267737618, "grad_norm": 0.19430634379386902, "learning_rate": 9.763943753718998e-05, "loss": 0.019, "step": 14330 }, { "epoch": 19.196787148594378, "grad_norm": 0.20377317070960999, "learning_rate": 9.76344144361053e-05, "loss": 0.0137, "step": 14340 }, { "epoch": 19.21017402945114, "grad_norm": 0.3417036235332489, "learning_rate": 9.762938612579269e-05, "loss": 0.0171, "step": 14350 }, { "epoch": 19.2235609103079, "grad_norm": 0.26568901538848877, "learning_rate": 9.762435260680202e-05, "loss": 0.016, "step": 14360 }, { "epoch": 19.23694779116466, "grad_norm": 0.3422519862651825, "learning_rate": 9.761931387968373e-05, "loss": 0.016, "step": 14370 }, { "epoch": 19.25033467202142, "grad_norm": 0.19345267117023468, "learning_rate": 9.76142699449889e-05, "loss": 0.0151, "step": 14380 }, { "epoch": 19.26372155287818, "grad_norm": 0.30347496271133423, "learning_rate": 9.760922080326908e-05, "loss": 0.0148, "step": 14390 }, { "epoch": 19.27710843373494, "grad_norm": 0.27105361223220825, "learning_rate": 9.760416645507644e-05, "loss": 0.0166, "step": 14400 }, { "epoch": 19.2904953145917, "grad_norm": 0.34867972135543823, "learning_rate": 9.759910690096375e-05, "loss": 0.0154, "step": 14410 }, { "epoch": 19.30388219544846, "grad_norm": 0.22604936361312866, "learning_rate": 9.759404214148429e-05, "loss": 0.0146, "step": 14420 }, { "epoch": 19.31726907630522, "grad_norm": 0.3076559007167816, "learning_rate": 9.758897217719191e-05, "loss": 0.0125, "step": 14430 }, { "epoch": 19.33065595716198, "grad_norm": 0.12751244008541107, "learning_rate": 9.758389700864113e-05, "loss": 0.0142, "step": 14440 }, { "epoch": 19.34404283801874, "grad_norm": 0.42824986577033997, "learning_rate": 9.757881663638688e-05, "loss": 0.0172, "step": 14450 }, { "epoch": 19.3574297188755, "grad_norm": 0.20798057317733765, "learning_rate": 9.757373106098478e-05, "loss": 0.0145, "step": 14460 }, { "epoch": 19.37081659973226, "grad_norm": 0.2513469457626343, "learning_rate": 9.756864028299097e-05, "loss": 0.0159, "step": 14470 }, { "epoch": 19.384203480589022, "grad_norm": 0.3442630469799042, "learning_rate": 9.75635443029622e-05, "loss": 0.0152, "step": 14480 }, { "epoch": 19.397590361445783, "grad_norm": 0.17573077976703644, "learning_rate": 9.755844312145572e-05, "loss": 0.0126, "step": 14490 }, { "epoch": 19.410977242302543, "grad_norm": 0.1627020239830017, "learning_rate": 9.755333673902941e-05, "loss": 0.0159, "step": 14500 }, { "epoch": 19.424364123159304, "grad_norm": 0.21898087859153748, "learning_rate": 9.75482251562417e-05, "loss": 0.0169, "step": 14510 }, { "epoch": 19.437751004016064, "grad_norm": 0.20813730359077454, "learning_rate": 9.754310837365155e-05, "loss": 0.0173, "step": 14520 }, { "epoch": 19.451137884872825, "grad_norm": 0.1891898661851883, "learning_rate": 9.753798639181856e-05, "loss": 0.0126, "step": 14530 }, { "epoch": 19.464524765729585, "grad_norm": 0.3783469796180725, "learning_rate": 9.753285921130286e-05, "loss": 0.0166, "step": 14540 }, { "epoch": 19.477911646586346, "grad_norm": 0.2096080332994461, "learning_rate": 9.752772683266512e-05, "loss": 0.014, "step": 14550 }, { "epoch": 19.491298527443107, "grad_norm": 0.2165209949016571, "learning_rate": 9.752258925646665e-05, "loss": 0.0187, "step": 14560 }, { "epoch": 19.504685408299867, "grad_norm": 0.22118178009986877, "learning_rate": 9.751744648326926e-05, "loss": 0.0185, "step": 14570 }, { "epoch": 19.518072289156628, "grad_norm": 0.2237023562192917, "learning_rate": 9.751229851363536e-05, "loss": 0.0144, "step": 14580 }, { "epoch": 19.531459170013388, "grad_norm": 0.15979450941085815, "learning_rate": 9.750714534812793e-05, "loss": 0.0141, "step": 14590 }, { "epoch": 19.54484605087015, "grad_norm": 0.20604851841926575, "learning_rate": 9.750198698731053e-05, "loss": 0.0158, "step": 14600 }, { "epoch": 19.55823293172691, "grad_norm": 0.26558858156204224, "learning_rate": 9.749682343174722e-05, "loss": 0.0153, "step": 14610 }, { "epoch": 19.57161981258367, "grad_norm": 0.19863861799240112, "learning_rate": 9.749165468200272e-05, "loss": 0.0148, "step": 14620 }, { "epoch": 19.585006693440427, "grad_norm": 0.23289157450199127, "learning_rate": 9.748648073864229e-05, "loss": 0.0147, "step": 14630 }, { "epoch": 19.598393574297187, "grad_norm": 0.2011745721101761, "learning_rate": 9.748130160223168e-05, "loss": 0.0126, "step": 14640 }, { "epoch": 19.611780455153948, "grad_norm": 0.25362148880958557, "learning_rate": 9.747611727333734e-05, "loss": 0.016, "step": 14650 }, { "epoch": 19.62516733601071, "grad_norm": 0.23475971817970276, "learning_rate": 9.74709277525262e-05, "loss": 0.0154, "step": 14660 }, { "epoch": 19.63855421686747, "grad_norm": 0.4407663643360138, "learning_rate": 9.746573304036576e-05, "loss": 0.0182, "step": 14670 }, { "epoch": 19.65194109772423, "grad_norm": 0.20900189876556396, "learning_rate": 9.746053313742412e-05, "loss": 0.018, "step": 14680 }, { "epoch": 19.66532797858099, "grad_norm": 0.1922289878129959, "learning_rate": 9.745532804426994e-05, "loss": 0.0186, "step": 14690 }, { "epoch": 19.67871485943775, "grad_norm": 0.15955528616905212, "learning_rate": 9.745011776147242e-05, "loss": 0.0146, "step": 14700 }, { "epoch": 19.69210174029451, "grad_norm": 0.1822960376739502, "learning_rate": 9.744490228960138e-05, "loss": 0.0168, "step": 14710 }, { "epoch": 19.70548862115127, "grad_norm": 0.14596374332904816, "learning_rate": 9.743968162922713e-05, "loss": 0.0157, "step": 14720 }, { "epoch": 19.718875502008032, "grad_norm": 0.24269990622997284, "learning_rate": 9.743445578092064e-05, "loss": 0.0152, "step": 14730 }, { "epoch": 19.732262382864793, "grad_norm": 0.21783249080181122, "learning_rate": 9.742922474525338e-05, "loss": 0.0164, "step": 14740 }, { "epoch": 19.745649263721553, "grad_norm": 0.2836959660053253, "learning_rate": 9.742398852279741e-05, "loss": 0.0155, "step": 14750 }, { "epoch": 19.759036144578314, "grad_norm": 0.23026372492313385, "learning_rate": 9.741874711412535e-05, "loss": 0.0165, "step": 14760 }, { "epoch": 19.772423025435074, "grad_norm": 0.41560855507850647, "learning_rate": 9.741350051981042e-05, "loss": 0.018, "step": 14770 }, { "epoch": 19.785809906291835, "grad_norm": 0.4646131694316864, "learning_rate": 9.740824874042633e-05, "loss": 0.0167, "step": 14780 }, { "epoch": 19.799196787148595, "grad_norm": 0.2861875593662262, "learning_rate": 9.740299177654746e-05, "loss": 0.0145, "step": 14790 }, { "epoch": 19.812583668005356, "grad_norm": 0.3136826455593109, "learning_rate": 9.739772962874867e-05, "loss": 0.0135, "step": 14800 }, { "epoch": 19.825970548862117, "grad_norm": 1.3986362218856812, "learning_rate": 9.739246229760541e-05, "loss": 0.0159, "step": 14810 }, { "epoch": 19.839357429718877, "grad_norm": 0.2976928949356079, "learning_rate": 9.738718978369376e-05, "loss": 0.0163, "step": 14820 }, { "epoch": 19.852744310575638, "grad_norm": 0.17326878011226654, "learning_rate": 9.738191208759025e-05, "loss": 0.0148, "step": 14830 }, { "epoch": 19.866131191432395, "grad_norm": 0.4246130585670471, "learning_rate": 9.73766292098721e-05, "loss": 0.0156, "step": 14840 }, { "epoch": 19.879518072289155, "grad_norm": 0.18989266455173492, "learning_rate": 9.737134115111699e-05, "loss": 0.016, "step": 14850 }, { "epoch": 19.892904953145916, "grad_norm": 0.3093932867050171, "learning_rate": 9.736604791190323e-05, "loss": 0.0155, "step": 14860 }, { "epoch": 19.906291834002676, "grad_norm": 0.18343961238861084, "learning_rate": 9.73607494928097e-05, "loss": 0.0133, "step": 14870 }, { "epoch": 19.919678714859437, "grad_norm": 0.20941205322742462, "learning_rate": 9.735544589441581e-05, "loss": 0.0148, "step": 14880 }, { "epoch": 19.933065595716197, "grad_norm": 1.0278455018997192, "learning_rate": 9.735013711730154e-05, "loss": 0.0154, "step": 14890 }, { "epoch": 19.946452476572958, "grad_norm": 1.0144453048706055, "learning_rate": 9.734482316204747e-05, "loss": 0.0149, "step": 14900 }, { "epoch": 19.95983935742972, "grad_norm": 0.5290202498435974, "learning_rate": 9.733950402923473e-05, "loss": 0.021, "step": 14910 }, { "epoch": 19.97322623828648, "grad_norm": 0.24754902720451355, "learning_rate": 9.7334179719445e-05, "loss": 0.0156, "step": 14920 }, { "epoch": 19.98661311914324, "grad_norm": 0.31367138028144836, "learning_rate": 9.732885023326053e-05, "loss": 0.0148, "step": 14930 }, { "epoch": 20.0, "grad_norm": 0.3898790180683136, "learning_rate": 9.732351557126418e-05, "loss": 0.0194, "step": 14940 }, { "epoch": 20.01338688085676, "grad_norm": 0.5135213136672974, "learning_rate": 9.731817573403929e-05, "loss": 0.0182, "step": 14950 }, { "epoch": 20.02677376171352, "grad_norm": 0.15241393446922302, "learning_rate": 9.731283072216985e-05, "loss": 0.0139, "step": 14960 }, { "epoch": 20.04016064257028, "grad_norm": 0.1815466582775116, "learning_rate": 9.730748053624039e-05, "loss": 0.0158, "step": 14970 }, { "epoch": 20.053547523427042, "grad_norm": 0.19766564667224884, "learning_rate": 9.730212517683598e-05, "loss": 0.0152, "step": 14980 }, { "epoch": 20.066934404283803, "grad_norm": 0.14083699882030487, "learning_rate": 9.729676464454228e-05, "loss": 0.0169, "step": 14990 }, { "epoch": 20.080321285140563, "grad_norm": 0.31897851824760437, "learning_rate": 9.72913989399455e-05, "loss": 0.0152, "step": 15000 }, { "epoch": 20.093708165997324, "grad_norm": 0.17268012464046478, "learning_rate": 9.728602806363242e-05, "loss": 0.0159, "step": 15010 }, { "epoch": 20.107095046854084, "grad_norm": 0.31045612692832947, "learning_rate": 9.728065201619043e-05, "loss": 0.0179, "step": 15020 }, { "epoch": 20.120481927710845, "grad_norm": 0.23404502868652344, "learning_rate": 9.727527079820742e-05, "loss": 0.016, "step": 15030 }, { "epoch": 20.133868808567605, "grad_norm": 0.2501435875892639, "learning_rate": 9.726988441027186e-05, "loss": 0.0154, "step": 15040 }, { "epoch": 20.147255689424362, "grad_norm": 0.19706369936466217, "learning_rate": 9.726449285297281e-05, "loss": 0.0161, "step": 15050 }, { "epoch": 20.160642570281123, "grad_norm": 0.18400613963603973, "learning_rate": 9.72590961268999e-05, "loss": 0.013, "step": 15060 }, { "epoch": 20.174029451137883, "grad_norm": 0.22700858116149902, "learning_rate": 9.725369423264328e-05, "loss": 0.017, "step": 15070 }, { "epoch": 20.187416331994644, "grad_norm": 0.1877514272928238, "learning_rate": 9.72482871707937e-05, "loss": 0.0151, "step": 15080 }, { "epoch": 20.200803212851405, "grad_norm": 0.21866397559642792, "learning_rate": 9.724287494194247e-05, "loss": 0.0187, "step": 15090 }, { "epoch": 20.214190093708165, "grad_norm": 0.3122369050979614, "learning_rate": 9.723745754668147e-05, "loss": 0.0163, "step": 15100 }, { "epoch": 20.227576974564926, "grad_norm": 0.2968292236328125, "learning_rate": 9.723203498560313e-05, "loss": 0.0178, "step": 15110 }, { "epoch": 20.240963855421686, "grad_norm": 0.20290009677410126, "learning_rate": 9.722660725930046e-05, "loss": 0.0162, "step": 15120 }, { "epoch": 20.254350736278447, "grad_norm": 0.24312542378902435, "learning_rate": 9.722117436836702e-05, "loss": 0.0193, "step": 15130 }, { "epoch": 20.267737617135207, "grad_norm": 0.2320886254310608, "learning_rate": 9.721573631339696e-05, "loss": 0.0152, "step": 15140 }, { "epoch": 20.281124497991968, "grad_norm": 0.21450889110565186, "learning_rate": 9.721029309498494e-05, "loss": 0.0145, "step": 15150 }, { "epoch": 20.29451137884873, "grad_norm": 0.17548036575317383, "learning_rate": 9.720484471372627e-05, "loss": 0.0141, "step": 15160 }, { "epoch": 20.30789825970549, "grad_norm": 0.4828341007232666, "learning_rate": 9.719939117021673e-05, "loss": 0.0156, "step": 15170 }, { "epoch": 20.32128514056225, "grad_norm": 0.3254314064979553, "learning_rate": 9.719393246505275e-05, "loss": 0.0155, "step": 15180 }, { "epoch": 20.33467202141901, "grad_norm": 0.22986704111099243, "learning_rate": 9.718846859883128e-05, "loss": 0.013, "step": 15190 }, { "epoch": 20.34805890227577, "grad_norm": 0.17972606420516968, "learning_rate": 9.718299957214982e-05, "loss": 0.0182, "step": 15200 }, { "epoch": 20.36144578313253, "grad_norm": 0.350785493850708, "learning_rate": 9.717752538560646e-05, "loss": 0.0172, "step": 15210 }, { "epoch": 20.37483266398929, "grad_norm": 0.4063788950443268, "learning_rate": 9.717204603979986e-05, "loss": 0.0174, "step": 15220 }, { "epoch": 20.388219544846052, "grad_norm": 0.25952616333961487, "learning_rate": 9.716656153532922e-05, "loss": 0.0179, "step": 15230 }, { "epoch": 20.401606425702813, "grad_norm": 0.2917180061340332, "learning_rate": 9.716107187279434e-05, "loss": 0.0151, "step": 15240 }, { "epoch": 20.414993306559573, "grad_norm": 0.24252913892269135, "learning_rate": 9.715557705279555e-05, "loss": 0.0173, "step": 15250 }, { "epoch": 20.42838018741633, "grad_norm": 0.179474875330925, "learning_rate": 9.715007707593372e-05, "loss": 0.0167, "step": 15260 }, { "epoch": 20.44176706827309, "grad_norm": 0.25563785433769226, "learning_rate": 9.714457194281036e-05, "loss": 0.0151, "step": 15270 }, { "epoch": 20.45515394912985, "grad_norm": 0.31420689821243286, "learning_rate": 9.713906165402751e-05, "loss": 0.0175, "step": 15280 }, { "epoch": 20.468540829986612, "grad_norm": 0.25899356603622437, "learning_rate": 9.713354621018774e-05, "loss": 0.0137, "step": 15290 }, { "epoch": 20.481927710843372, "grad_norm": 0.25040119886398315, "learning_rate": 9.712802561189422e-05, "loss": 0.0146, "step": 15300 }, { "epoch": 20.495314591700133, "grad_norm": 0.24471637606620789, "learning_rate": 9.712249985975069e-05, "loss": 0.0154, "step": 15310 }, { "epoch": 20.508701472556893, "grad_norm": 0.2711660861968994, "learning_rate": 9.71169689543614e-05, "loss": 0.0174, "step": 15320 }, { "epoch": 20.522088353413654, "grad_norm": 0.15080435574054718, "learning_rate": 9.711143289633123e-05, "loss": 0.0126, "step": 15330 }, { "epoch": 20.535475234270415, "grad_norm": 0.4722711741924286, "learning_rate": 9.710589168626561e-05, "loss": 0.0135, "step": 15340 }, { "epoch": 20.548862115127175, "grad_norm": 0.2637611925601959, "learning_rate": 9.710034532477048e-05, "loss": 0.0161, "step": 15350 }, { "epoch": 20.562248995983936, "grad_norm": 0.22790029644966125, "learning_rate": 9.709479381245239e-05, "loss": 0.0138, "step": 15360 }, { "epoch": 20.575635876840696, "grad_norm": 0.1657637357711792, "learning_rate": 9.708923714991847e-05, "loss": 0.0149, "step": 15370 }, { "epoch": 20.589022757697457, "grad_norm": 0.2723217308521271, "learning_rate": 9.708367533777638e-05, "loss": 0.0156, "step": 15380 }, { "epoch": 20.602409638554217, "grad_norm": 0.6069658994674683, "learning_rate": 9.707810837663431e-05, "loss": 0.0158, "step": 15390 }, { "epoch": 20.615796519410978, "grad_norm": 0.14312583208084106, "learning_rate": 9.707253626710113e-05, "loss": 0.0148, "step": 15400 }, { "epoch": 20.62918340026774, "grad_norm": 0.16444054245948792, "learning_rate": 9.706695900978613e-05, "loss": 0.014, "step": 15410 }, { "epoch": 20.6425702811245, "grad_norm": 0.568734347820282, "learning_rate": 9.706137660529926e-05, "loss": 0.0129, "step": 15420 }, { "epoch": 20.65595716198126, "grad_norm": 0.3039434254169464, "learning_rate": 9.705578905425101e-05, "loss": 0.0162, "step": 15430 }, { "epoch": 20.66934404283802, "grad_norm": 0.35343846678733826, "learning_rate": 9.705019635725241e-05, "loss": 0.0163, "step": 15440 }, { "epoch": 20.68273092369478, "grad_norm": 0.2397642880678177, "learning_rate": 9.704459851491508e-05, "loss": 0.0164, "step": 15450 }, { "epoch": 20.69611780455154, "grad_norm": 0.22343742847442627, "learning_rate": 9.703899552785118e-05, "loss": 0.0141, "step": 15460 }, { "epoch": 20.7095046854083, "grad_norm": 0.20151035487651825, "learning_rate": 9.703338739667346e-05, "loss": 0.0146, "step": 15470 }, { "epoch": 20.72289156626506, "grad_norm": 0.1928141862154007, "learning_rate": 9.70277741219952e-05, "loss": 0.0125, "step": 15480 }, { "epoch": 20.73627844712182, "grad_norm": 0.2943059206008911, "learning_rate": 9.702215570443027e-05, "loss": 0.0159, "step": 15490 }, { "epoch": 20.74966532797858, "grad_norm": 0.2740659713745117, "learning_rate": 9.701653214459309e-05, "loss": 0.0154, "step": 15500 }, { "epoch": 20.76305220883534, "grad_norm": 0.1920522302389145, "learning_rate": 9.701090344309865e-05, "loss": 0.0173, "step": 15510 }, { "epoch": 20.7764390896921, "grad_norm": 0.20266205072402954, "learning_rate": 9.700526960056247e-05, "loss": 0.016, "step": 15520 }, { "epoch": 20.78982597054886, "grad_norm": 0.3632095158100128, "learning_rate": 9.699963061760068e-05, "loss": 0.0156, "step": 15530 }, { "epoch": 20.803212851405622, "grad_norm": 0.5532779693603516, "learning_rate": 9.699398649482997e-05, "loss": 0.0154, "step": 15540 }, { "epoch": 20.816599732262382, "grad_norm": 0.32147160172462463, "learning_rate": 9.698833723286753e-05, "loss": 0.0168, "step": 15550 }, { "epoch": 20.829986613119143, "grad_norm": 0.18760152161121368, "learning_rate": 9.698268283233118e-05, "loss": 0.015, "step": 15560 }, { "epoch": 20.843373493975903, "grad_norm": 0.21354658901691437, "learning_rate": 9.697702329383929e-05, "loss": 0.0175, "step": 15570 }, { "epoch": 20.856760374832664, "grad_norm": 0.22818811237812042, "learning_rate": 9.697135861801074e-05, "loss": 0.0142, "step": 15580 }, { "epoch": 20.870147255689425, "grad_norm": 0.1673123687505722, "learning_rate": 9.696568880546505e-05, "loss": 0.0165, "step": 15590 }, { "epoch": 20.883534136546185, "grad_norm": 0.220223531126976, "learning_rate": 9.696001385682223e-05, "loss": 0.0107, "step": 15600 }, { "epoch": 20.896921017402946, "grad_norm": 0.19493599236011505, "learning_rate": 9.695433377270291e-05, "loss": 0.0117, "step": 15610 }, { "epoch": 20.910307898259706, "grad_norm": 0.7402459383010864, "learning_rate": 9.694864855372824e-05, "loss": 0.0138, "step": 15620 }, { "epoch": 20.923694779116467, "grad_norm": 0.1902385652065277, "learning_rate": 9.694295820051995e-05, "loss": 0.0132, "step": 15630 }, { "epoch": 20.937081659973227, "grad_norm": 0.21054646372795105, "learning_rate": 9.693726271370032e-05, "loss": 0.0122, "step": 15640 }, { "epoch": 20.950468540829988, "grad_norm": 0.5497515201568604, "learning_rate": 9.693156209389221e-05, "loss": 0.0129, "step": 15650 }, { "epoch": 20.96385542168675, "grad_norm": 0.14521829783916473, "learning_rate": 9.692585634171905e-05, "loss": 0.014, "step": 15660 }, { "epoch": 20.97724230254351, "grad_norm": 0.20105178654193878, "learning_rate": 9.692014545780476e-05, "loss": 0.0132, "step": 15670 }, { "epoch": 20.99062918340027, "grad_norm": 0.29216450452804565, "learning_rate": 9.691442944277393e-05, "loss": 0.0151, "step": 15680 }, { "epoch": 21.004016064257026, "grad_norm": 0.34027546644210815, "learning_rate": 9.690870829725162e-05, "loss": 0.0125, "step": 15690 }, { "epoch": 21.017402945113787, "grad_norm": 0.27340584993362427, "learning_rate": 9.69029820218635e-05, "loss": 0.0139, "step": 15700 }, { "epoch": 21.030789825970547, "grad_norm": 0.27981606125831604, "learning_rate": 9.689725061723579e-05, "loss": 0.0131, "step": 15710 }, { "epoch": 21.044176706827308, "grad_norm": 0.5095214247703552, "learning_rate": 9.689151408399527e-05, "loss": 0.0175, "step": 15720 }, { "epoch": 21.05756358768407, "grad_norm": 0.2671084403991699, "learning_rate": 9.688577242276924e-05, "loss": 0.0126, "step": 15730 }, { "epoch": 21.07095046854083, "grad_norm": 0.20026805996894836, "learning_rate": 9.688002563418566e-05, "loss": 0.014, "step": 15740 }, { "epoch": 21.08433734939759, "grad_norm": 0.21637102961540222, "learning_rate": 9.687427371887293e-05, "loss": 0.0148, "step": 15750 }, { "epoch": 21.09772423025435, "grad_norm": 0.2226255238056183, "learning_rate": 9.686851667746012e-05, "loss": 0.0148, "step": 15760 }, { "epoch": 21.11111111111111, "grad_norm": 0.4808180332183838, "learning_rate": 9.686275451057677e-05, "loss": 0.0162, "step": 15770 }, { "epoch": 21.12449799196787, "grad_norm": 0.31342506408691406, "learning_rate": 9.685698721885308e-05, "loss": 0.0159, "step": 15780 }, { "epoch": 21.137884872824632, "grad_norm": 0.5847427248954773, "learning_rate": 9.68512148029197e-05, "loss": 0.0136, "step": 15790 }, { "epoch": 21.151271753681392, "grad_norm": 0.21752893924713135, "learning_rate": 9.684543726340791e-05, "loss": 0.0198, "step": 15800 }, { "epoch": 21.164658634538153, "grad_norm": 0.27471524477005005, "learning_rate": 9.683965460094952e-05, "loss": 0.015, "step": 15810 }, { "epoch": 21.178045515394913, "grad_norm": 0.2472798228263855, "learning_rate": 9.683386681617694e-05, "loss": 0.0201, "step": 15820 }, { "epoch": 21.191432396251674, "grad_norm": 0.40932103991508484, "learning_rate": 9.68280739097231e-05, "loss": 0.0143, "step": 15830 }, { "epoch": 21.204819277108435, "grad_norm": 0.3706764280796051, "learning_rate": 9.682227588222148e-05, "loss": 0.0148, "step": 15840 }, { "epoch": 21.218206157965195, "grad_norm": 0.2538731098175049, "learning_rate": 9.681647273430618e-05, "loss": 0.0142, "step": 15850 }, { "epoch": 21.231593038821956, "grad_norm": 0.296343058347702, "learning_rate": 9.681066446661182e-05, "loss": 0.0153, "step": 15860 }, { "epoch": 21.244979919678716, "grad_norm": 0.5275658369064331, "learning_rate": 9.680485107977357e-05, "loss": 0.0158, "step": 15870 }, { "epoch": 21.258366800535477, "grad_norm": 0.29089075326919556, "learning_rate": 9.679903257442716e-05, "loss": 0.0157, "step": 15880 }, { "epoch": 21.271753681392237, "grad_norm": 0.17289511859416962, "learning_rate": 9.679320895120891e-05, "loss": 0.0131, "step": 15890 }, { "epoch": 21.285140562248998, "grad_norm": 0.15444596111774445, "learning_rate": 9.67873802107557e-05, "loss": 0.0145, "step": 15900 }, { "epoch": 21.298527443105755, "grad_norm": 0.17311279475688934, "learning_rate": 9.67815463537049e-05, "loss": 0.0154, "step": 15910 }, { "epoch": 21.311914323962515, "grad_norm": 0.2167661041021347, "learning_rate": 9.677570738069457e-05, "loss": 0.0134, "step": 15920 }, { "epoch": 21.325301204819276, "grad_norm": 0.19029387831687927, "learning_rate": 9.676986329236318e-05, "loss": 0.0156, "step": 15930 }, { "epoch": 21.338688085676036, "grad_norm": 0.46265509724617004, "learning_rate": 9.676401408934987e-05, "loss": 0.0145, "step": 15940 }, { "epoch": 21.352074966532797, "grad_norm": 0.2189975082874298, "learning_rate": 9.675815977229428e-05, "loss": 0.0154, "step": 15950 }, { "epoch": 21.365461847389557, "grad_norm": 0.2503263056278229, "learning_rate": 9.675230034183664e-05, "loss": 0.0129, "step": 15960 }, { "epoch": 21.378848728246318, "grad_norm": 0.17158295214176178, "learning_rate": 9.674643579861773e-05, "loss": 0.0153, "step": 15970 }, { "epoch": 21.39223560910308, "grad_norm": 0.23680031299591064, "learning_rate": 9.674056614327886e-05, "loss": 0.0147, "step": 15980 }, { "epoch": 21.40562248995984, "grad_norm": 0.2326812893152237, "learning_rate": 9.673469137646198e-05, "loss": 0.0128, "step": 15990 }, { "epoch": 21.4190093708166, "grad_norm": 0.2519954741001129, "learning_rate": 9.67288114988095e-05, "loss": 0.0146, "step": 16000 }, { "epoch": 21.43239625167336, "grad_norm": 0.18503664433956146, "learning_rate": 9.672292651096447e-05, "loss": 0.0117, "step": 16010 }, { "epoch": 21.44578313253012, "grad_norm": 0.2587525546550751, "learning_rate": 9.671703641357042e-05, "loss": 0.0134, "step": 16020 }, { "epoch": 21.45917001338688, "grad_norm": 0.2743535339832306, "learning_rate": 9.67111412072715e-05, "loss": 0.0136, "step": 16030 }, { "epoch": 21.472556894243642, "grad_norm": 0.21658974885940552, "learning_rate": 9.670524089271242e-05, "loss": 0.0134, "step": 16040 }, { "epoch": 21.485943775100402, "grad_norm": 0.2564528286457062, "learning_rate": 9.669933547053842e-05, "loss": 0.0131, "step": 16050 }, { "epoch": 21.499330655957163, "grad_norm": 0.21757158637046814, "learning_rate": 9.669342494139531e-05, "loss": 0.0148, "step": 16060 }, { "epoch": 21.512717536813923, "grad_norm": 0.7403753995895386, "learning_rate": 9.668750930592943e-05, "loss": 0.0135, "step": 16070 }, { "epoch": 21.526104417670684, "grad_norm": 0.2056664228439331, "learning_rate": 9.668158856478775e-05, "loss": 0.0135, "step": 16080 }, { "epoch": 21.539491298527444, "grad_norm": 0.2860671877861023, "learning_rate": 9.66756627186177e-05, "loss": 0.0129, "step": 16090 }, { "epoch": 21.552878179384205, "grad_norm": 0.3350357115268707, "learning_rate": 9.666973176806737e-05, "loss": 0.0174, "step": 16100 }, { "epoch": 21.566265060240966, "grad_norm": 0.24938619136810303, "learning_rate": 9.666379571378534e-05, "loss": 0.0149, "step": 16110 }, { "epoch": 21.579651941097723, "grad_norm": 0.1925618201494217, "learning_rate": 9.665785455642076e-05, "loss": 0.0126, "step": 16120 }, { "epoch": 21.593038821954483, "grad_norm": 0.2660065293312073, "learning_rate": 9.665190829662337e-05, "loss": 0.016, "step": 16130 }, { "epoch": 21.606425702811244, "grad_norm": 0.28098466992378235, "learning_rate": 9.664595693504342e-05, "loss": 0.016, "step": 16140 }, { "epoch": 21.619812583668004, "grad_norm": 0.17108632624149323, "learning_rate": 9.664000047233175e-05, "loss": 0.0122, "step": 16150 }, { "epoch": 21.633199464524765, "grad_norm": 0.31390219926834106, "learning_rate": 9.663403890913976e-05, "loss": 0.0161, "step": 16160 }, { "epoch": 21.646586345381525, "grad_norm": 0.26964253187179565, "learning_rate": 9.662807224611938e-05, "loss": 0.0138, "step": 16170 }, { "epoch": 21.659973226238286, "grad_norm": 0.2554338574409485, "learning_rate": 9.662210048392311e-05, "loss": 0.0132, "step": 16180 }, { "epoch": 21.673360107095046, "grad_norm": 0.22656579315662384, "learning_rate": 9.661612362320405e-05, "loss": 0.0142, "step": 16190 }, { "epoch": 21.686746987951807, "grad_norm": 0.3408540189266205, "learning_rate": 9.661014166461579e-05, "loss": 0.0157, "step": 16200 }, { "epoch": 21.700133868808567, "grad_norm": 0.21916107833385468, "learning_rate": 9.66041546088125e-05, "loss": 0.0162, "step": 16210 }, { "epoch": 21.713520749665328, "grad_norm": 0.23561973869800568, "learning_rate": 9.659816245644895e-05, "loss": 0.0139, "step": 16220 }, { "epoch": 21.72690763052209, "grad_norm": 0.17381344735622406, "learning_rate": 9.65921652081804e-05, "loss": 0.0176, "step": 16230 }, { "epoch": 21.74029451137885, "grad_norm": 0.18620294332504272, "learning_rate": 9.658616286466271e-05, "loss": 0.0159, "step": 16240 }, { "epoch": 21.75368139223561, "grad_norm": 0.30914106965065, "learning_rate": 9.65801554265523e-05, "loss": 0.0131, "step": 16250 }, { "epoch": 21.76706827309237, "grad_norm": 0.3264181911945343, "learning_rate": 9.657414289450612e-05, "loss": 0.0121, "step": 16260 }, { "epoch": 21.78045515394913, "grad_norm": 0.1976630836725235, "learning_rate": 9.656812526918171e-05, "loss": 0.0147, "step": 16270 }, { "epoch": 21.79384203480589, "grad_norm": 0.25678709149360657, "learning_rate": 9.656210255123712e-05, "loss": 0.0158, "step": 16280 }, { "epoch": 21.80722891566265, "grad_norm": 0.3108547329902649, "learning_rate": 9.6556074741331e-05, "loss": 0.0151, "step": 16290 }, { "epoch": 21.820615796519412, "grad_norm": 0.19983476400375366, "learning_rate": 9.655004184012256e-05, "loss": 0.0151, "step": 16300 }, { "epoch": 21.834002677376173, "grad_norm": 0.49805620312690735, "learning_rate": 9.654400384827152e-05, "loss": 0.0194, "step": 16310 }, { "epoch": 21.847389558232933, "grad_norm": 0.1774892956018448, "learning_rate": 9.653796076643818e-05, "loss": 0.0155, "step": 16320 }, { "epoch": 21.86077643908969, "grad_norm": 0.17219385504722595, "learning_rate": 9.653191259528344e-05, "loss": 0.0135, "step": 16330 }, { "epoch": 21.87416331994645, "grad_norm": 0.221721351146698, "learning_rate": 9.65258593354687e-05, "loss": 0.0154, "step": 16340 }, { "epoch": 21.88755020080321, "grad_norm": 0.21026217937469482, "learning_rate": 9.651980098765591e-05, "loss": 0.0132, "step": 16350 }, { "epoch": 21.900937081659972, "grad_norm": 0.4411478042602539, "learning_rate": 9.651373755250765e-05, "loss": 0.0138, "step": 16360 }, { "epoch": 21.914323962516733, "grad_norm": 0.17170125246047974, "learning_rate": 9.650766903068697e-05, "loss": 0.015, "step": 16370 }, { "epoch": 21.927710843373493, "grad_norm": 0.4350406527519226, "learning_rate": 9.650159542285753e-05, "loss": 0.0138, "step": 16380 }, { "epoch": 21.941097724230254, "grad_norm": 1.1537833213806152, "learning_rate": 9.649551672968353e-05, "loss": 0.0168, "step": 16390 }, { "epoch": 21.954484605087014, "grad_norm": 0.257390558719635, "learning_rate": 9.648943295182973e-05, "loss": 0.0164, "step": 16400 }, { "epoch": 21.967871485943775, "grad_norm": 0.26861050724983215, "learning_rate": 9.648334408996144e-05, "loss": 0.0148, "step": 16410 }, { "epoch": 21.981258366800535, "grad_norm": 0.27847903966903687, "learning_rate": 9.647725014474452e-05, "loss": 0.0164, "step": 16420 }, { "epoch": 21.994645247657296, "grad_norm": 0.21363039314746857, "learning_rate": 9.64711511168454e-05, "loss": 0.0121, "step": 16430 }, { "epoch": 22.008032128514056, "grad_norm": 0.1422131508588791, "learning_rate": 9.646504700693108e-05, "loss": 0.0148, "step": 16440 }, { "epoch": 22.021419009370817, "grad_norm": 0.7410272359848022, "learning_rate": 9.645893781566907e-05, "loss": 0.0131, "step": 16450 }, { "epoch": 22.034805890227577, "grad_norm": 0.16480934619903564, "learning_rate": 9.645282354372744e-05, "loss": 0.0147, "step": 16460 }, { "epoch": 22.048192771084338, "grad_norm": 0.5840550661087036, "learning_rate": 9.644670419177491e-05, "loss": 0.0157, "step": 16470 }, { "epoch": 22.0615796519411, "grad_norm": 0.18372732400894165, "learning_rate": 9.644057976048062e-05, "loss": 0.0122, "step": 16480 }, { "epoch": 22.07496653279786, "grad_norm": 0.23330320417881012, "learning_rate": 9.643445025051435e-05, "loss": 0.0143, "step": 16490 }, { "epoch": 22.08835341365462, "grad_norm": 0.2031058818101883, "learning_rate": 9.642831566254641e-05, "loss": 0.0146, "step": 16500 }, { "epoch": 22.10174029451138, "grad_norm": 0.26037657260894775, "learning_rate": 9.642217599724769e-05, "loss": 0.0143, "step": 16510 }, { "epoch": 22.11512717536814, "grad_norm": 0.2300577312707901, "learning_rate": 9.64160312552896e-05, "loss": 0.0134, "step": 16520 }, { "epoch": 22.1285140562249, "grad_norm": 0.2845425605773926, "learning_rate": 9.64098814373441e-05, "loss": 0.0145, "step": 16530 }, { "epoch": 22.141900937081658, "grad_norm": 1.1533713340759277, "learning_rate": 9.640372654408374e-05, "loss": 0.0137, "step": 16540 }, { "epoch": 22.15528781793842, "grad_norm": 0.17278920114040375, "learning_rate": 9.639756657618162e-05, "loss": 0.0136, "step": 16550 }, { "epoch": 22.16867469879518, "grad_norm": 0.2778032422065735, "learning_rate": 9.639140153431138e-05, "loss": 0.0126, "step": 16560 }, { "epoch": 22.18206157965194, "grad_norm": 0.18064141273498535, "learning_rate": 9.638523141914721e-05, "loss": 0.0138, "step": 16570 }, { "epoch": 22.1954484605087, "grad_norm": 0.4813990890979767, "learning_rate": 9.637905623136388e-05, "loss": 0.0134, "step": 16580 }, { "epoch": 22.20883534136546, "grad_norm": 0.31604263186454773, "learning_rate": 9.637287597163669e-05, "loss": 0.0147, "step": 16590 }, { "epoch": 22.22222222222222, "grad_norm": 0.6089951992034912, "learning_rate": 9.63666906406415e-05, "loss": 0.014, "step": 16600 }, { "epoch": 22.235609103078982, "grad_norm": 2.109109401702881, "learning_rate": 9.636050023905473e-05, "loss": 0.016, "step": 16610 }, { "epoch": 22.248995983935743, "grad_norm": 0.24901340901851654, "learning_rate": 9.635430476755336e-05, "loss": 0.0201, "step": 16620 }, { "epoch": 22.262382864792503, "grad_norm": 0.22867393493652344, "learning_rate": 9.63481042268149e-05, "loss": 0.0162, "step": 16630 }, { "epoch": 22.275769745649264, "grad_norm": 0.3003045916557312, "learning_rate": 9.634189861751745e-05, "loss": 0.0148, "step": 16640 }, { "epoch": 22.289156626506024, "grad_norm": 0.2619195580482483, "learning_rate": 9.633568794033967e-05, "loss": 0.0178, "step": 16650 }, { "epoch": 22.302543507362785, "grad_norm": 0.1828472763299942, "learning_rate": 9.63294721959607e-05, "loss": 0.0155, "step": 16660 }, { "epoch": 22.315930388219545, "grad_norm": 0.2696637511253357, "learning_rate": 9.63232513850603e-05, "loss": 0.0147, "step": 16670 }, { "epoch": 22.329317269076306, "grad_norm": 0.1754608154296875, "learning_rate": 9.631702550831878e-05, "loss": 0.0171, "step": 16680 }, { "epoch": 22.342704149933066, "grad_norm": 0.34113261103630066, "learning_rate": 9.631079456641698e-05, "loss": 0.0162, "step": 16690 }, { "epoch": 22.356091030789827, "grad_norm": 0.1777423471212387, "learning_rate": 9.630455856003632e-05, "loss": 0.0189, "step": 16700 }, { "epoch": 22.369477911646587, "grad_norm": 0.24163827300071716, "learning_rate": 9.629831748985876e-05, "loss": 0.0176, "step": 16710 }, { "epoch": 22.382864792503348, "grad_norm": 0.3114303648471832, "learning_rate": 9.629207135656679e-05, "loss": 0.0141, "step": 16720 }, { "epoch": 22.39625167336011, "grad_norm": 0.2143089324235916, "learning_rate": 9.628582016084353e-05, "loss": 0.017, "step": 16730 }, { "epoch": 22.40963855421687, "grad_norm": 0.20285090804100037, "learning_rate": 9.627956390337254e-05, "loss": 0.0133, "step": 16740 }, { "epoch": 22.42302543507363, "grad_norm": 0.3259180784225464, "learning_rate": 9.627330258483802e-05, "loss": 0.0146, "step": 16750 }, { "epoch": 22.436412315930387, "grad_norm": 0.24590711295604706, "learning_rate": 9.62670362059247e-05, "loss": 0.0137, "step": 16760 }, { "epoch": 22.449799196787147, "grad_norm": 0.2741425633430481, "learning_rate": 9.626076476731786e-05, "loss": 0.0162, "step": 16770 }, { "epoch": 22.463186077643908, "grad_norm": 0.22715029120445251, "learning_rate": 9.625448826970336e-05, "loss": 0.0128, "step": 16780 }, { "epoch": 22.476572958500668, "grad_norm": 0.23959729075431824, "learning_rate": 9.624820671376755e-05, "loss": 0.0156, "step": 16790 }, { "epoch": 22.48995983935743, "grad_norm": 0.24279280006885529, "learning_rate": 9.62419201001974e-05, "loss": 0.0139, "step": 16800 }, { "epoch": 22.50334672021419, "grad_norm": 0.23946216702461243, "learning_rate": 9.623562842968037e-05, "loss": 0.0152, "step": 16810 }, { "epoch": 22.51673360107095, "grad_norm": 0.2539496123790741, "learning_rate": 9.622933170290454e-05, "loss": 0.0158, "step": 16820 }, { "epoch": 22.53012048192771, "grad_norm": 0.1567636877298355, "learning_rate": 9.622302992055849e-05, "loss": 0.0152, "step": 16830 }, { "epoch": 22.54350736278447, "grad_norm": 0.21449697017669678, "learning_rate": 9.62167230833314e-05, "loss": 0.0171, "step": 16840 }, { "epoch": 22.55689424364123, "grad_norm": 0.2154327630996704, "learning_rate": 9.621041119191295e-05, "loss": 0.0144, "step": 16850 }, { "epoch": 22.570281124497992, "grad_norm": 0.199310764670372, "learning_rate": 9.620409424699342e-05, "loss": 0.0118, "step": 16860 }, { "epoch": 22.583668005354752, "grad_norm": 1.0635396242141724, "learning_rate": 9.619777224926359e-05, "loss": 0.0144, "step": 16870 }, { "epoch": 22.597054886211513, "grad_norm": 1.0757580995559692, "learning_rate": 9.619144519941485e-05, "loss": 0.0128, "step": 16880 }, { "epoch": 22.610441767068274, "grad_norm": 0.21571972966194153, "learning_rate": 9.618511309813912e-05, "loss": 0.0141, "step": 16890 }, { "epoch": 22.623828647925034, "grad_norm": 0.24158930778503418, "learning_rate": 9.617877594612886e-05, "loss": 0.0148, "step": 16900 }, { "epoch": 22.637215528781795, "grad_norm": 0.2731855511665344, "learning_rate": 9.617243374407707e-05, "loss": 0.0129, "step": 16910 }, { "epoch": 22.650602409638555, "grad_norm": 0.24510039389133453, "learning_rate": 9.616608649267736e-05, "loss": 0.0164, "step": 16920 }, { "epoch": 22.663989290495316, "grad_norm": 0.2405036836862564, "learning_rate": 9.615973419262385e-05, "loss": 0.0142, "step": 16930 }, { "epoch": 22.677376171352076, "grad_norm": 0.2815602123737335, "learning_rate": 9.615337684461119e-05, "loss": 0.0167, "step": 16940 }, { "epoch": 22.690763052208837, "grad_norm": 0.3478851020336151, "learning_rate": 9.614701444933465e-05, "loss": 0.0132, "step": 16950 }, { "epoch": 22.704149933065597, "grad_norm": 0.2672278881072998, "learning_rate": 9.614064700748997e-05, "loss": 0.0114, "step": 16960 }, { "epoch": 22.717536813922358, "grad_norm": 0.3427831530570984, "learning_rate": 9.613427451977352e-05, "loss": 0.0154, "step": 16970 }, { "epoch": 22.730923694779115, "grad_norm": 0.1502324640750885, "learning_rate": 9.612789698688216e-05, "loss": 0.0128, "step": 16980 }, { "epoch": 22.744310575635875, "grad_norm": 0.22979077696800232, "learning_rate": 9.612151440951334e-05, "loss": 0.0171, "step": 16990 }, { "epoch": 22.757697456492636, "grad_norm": 0.2996981739997864, "learning_rate": 9.611512678836506e-05, "loss": 0.0183, "step": 17000 }, { "epoch": 22.771084337349397, "grad_norm": 0.1709384173154831, "learning_rate": 9.610873412413584e-05, "loss": 0.0148, "step": 17010 }, { "epoch": 22.784471218206157, "grad_norm": 0.2817484140396118, "learning_rate": 9.610233641752476e-05, "loss": 0.0194, "step": 17020 }, { "epoch": 22.797858099062918, "grad_norm": 0.17641732096672058, "learning_rate": 9.609593366923151e-05, "loss": 0.0169, "step": 17030 }, { "epoch": 22.811244979919678, "grad_norm": 0.1876816302537918, "learning_rate": 9.608952587995625e-05, "loss": 0.0143, "step": 17040 }, { "epoch": 22.82463186077644, "grad_norm": 0.6000911593437195, "learning_rate": 9.608311305039972e-05, "loss": 0.0159, "step": 17050 }, { "epoch": 22.8380187416332, "grad_norm": 0.21480679512023926, "learning_rate": 9.607669518126326e-05, "loss": 0.0134, "step": 17060 }, { "epoch": 22.85140562248996, "grad_norm": 0.21277955174446106, "learning_rate": 9.607027227324866e-05, "loss": 0.0148, "step": 17070 }, { "epoch": 22.86479250334672, "grad_norm": 0.8917891979217529, "learning_rate": 9.606384432705837e-05, "loss": 0.0138, "step": 17080 }, { "epoch": 22.87817938420348, "grad_norm": 0.34257209300994873, "learning_rate": 9.60574113433953e-05, "loss": 0.0142, "step": 17090 }, { "epoch": 22.89156626506024, "grad_norm": 0.23927611112594604, "learning_rate": 9.6050973322963e-05, "loss": 0.013, "step": 17100 }, { "epoch": 22.904953145917002, "grad_norm": 0.17770624160766602, "learning_rate": 9.604453026646547e-05, "loss": 0.0153, "step": 17110 }, { "epoch": 22.918340026773762, "grad_norm": 0.35068029165267944, "learning_rate": 9.603808217460735e-05, "loss": 0.0149, "step": 17120 }, { "epoch": 22.931726907630523, "grad_norm": 0.342313677072525, "learning_rate": 9.603162904809377e-05, "loss": 0.0166, "step": 17130 }, { "epoch": 22.945113788487284, "grad_norm": 0.27038338780403137, "learning_rate": 9.602517088763045e-05, "loss": 0.0154, "step": 17140 }, { "epoch": 22.958500669344044, "grad_norm": 0.29004114866256714, "learning_rate": 9.601870769392365e-05, "loss": 0.0158, "step": 17150 }, { "epoch": 22.971887550200805, "grad_norm": 1.062611699104309, "learning_rate": 9.601223946768017e-05, "loss": 0.0165, "step": 17160 }, { "epoch": 22.985274431057565, "grad_norm": 0.3085384964942932, "learning_rate": 9.600576620960734e-05, "loss": 0.0153, "step": 17170 }, { "epoch": 22.998661311914326, "grad_norm": 0.23608599603176117, "learning_rate": 9.599928792041308e-05, "loss": 0.0145, "step": 17180 }, { "epoch": 23.012048192771083, "grad_norm": 0.24020791053771973, "learning_rate": 9.599280460080587e-05, "loss": 0.0138, "step": 17190 }, { "epoch": 23.025435073627843, "grad_norm": 0.20824916660785675, "learning_rate": 9.59863162514947e-05, "loss": 0.0184, "step": 17200 }, { "epoch": 23.038821954484604, "grad_norm": 0.3793768286705017, "learning_rate": 9.597982287318911e-05, "loss": 0.015, "step": 17210 }, { "epoch": 23.052208835341364, "grad_norm": 0.2560615539550781, "learning_rate": 9.597332446659923e-05, "loss": 0.0155, "step": 17220 }, { "epoch": 23.065595716198125, "grad_norm": 0.27434518933296204, "learning_rate": 9.59668210324357e-05, "loss": 0.0161, "step": 17230 }, { "epoch": 23.078982597054885, "grad_norm": 0.21020762622356415, "learning_rate": 9.596031257140974e-05, "loss": 0.0161, "step": 17240 }, { "epoch": 23.092369477911646, "grad_norm": 0.3151760399341583, "learning_rate": 9.59537990842331e-05, "loss": 0.0152, "step": 17250 }, { "epoch": 23.105756358768406, "grad_norm": 0.3228706121444702, "learning_rate": 9.594728057161806e-05, "loss": 0.0148, "step": 17260 }, { "epoch": 23.119143239625167, "grad_norm": 0.34416383504867554, "learning_rate": 9.594075703427752e-05, "loss": 0.0149, "step": 17270 }, { "epoch": 23.132530120481928, "grad_norm": 0.2849937081336975, "learning_rate": 9.593422847292486e-05, "loss": 0.014, "step": 17280 }, { "epoch": 23.145917001338688, "grad_norm": 0.1171717718243599, "learning_rate": 9.592769488827402e-05, "loss": 0.0129, "step": 17290 }, { "epoch": 23.15930388219545, "grad_norm": 0.20034246146678925, "learning_rate": 9.592115628103952e-05, "loss": 0.0134, "step": 17300 }, { "epoch": 23.17269076305221, "grad_norm": 1.1569647789001465, "learning_rate": 9.591461265193643e-05, "loss": 0.0123, "step": 17310 }, { "epoch": 23.18607764390897, "grad_norm": 0.22031359374523163, "learning_rate": 9.590806400168032e-05, "loss": 0.0163, "step": 17320 }, { "epoch": 23.19946452476573, "grad_norm": 0.1422661542892456, "learning_rate": 9.590151033098735e-05, "loss": 0.0156, "step": 17330 }, { "epoch": 23.21285140562249, "grad_norm": 0.40443915128707886, "learning_rate": 9.589495164057423e-05, "loss": 0.0131, "step": 17340 }, { "epoch": 23.22623828647925, "grad_norm": 0.16969244182109833, "learning_rate": 9.58883879311582e-05, "loss": 0.0127, "step": 17350 }, { "epoch": 23.239625167336012, "grad_norm": 0.23848678171634674, "learning_rate": 9.588181920345705e-05, "loss": 0.0136, "step": 17360 }, { "epoch": 23.253012048192772, "grad_norm": 0.23095576465129852, "learning_rate": 9.587524545818913e-05, "loss": 0.0122, "step": 17370 }, { "epoch": 23.266398929049533, "grad_norm": 0.21459035575389862, "learning_rate": 9.586866669607335e-05, "loss": 0.0136, "step": 17380 }, { "epoch": 23.279785809906294, "grad_norm": 0.5767573118209839, "learning_rate": 9.586208291782915e-05, "loss": 0.0149, "step": 17390 }, { "epoch": 23.29317269076305, "grad_norm": 0.19981925189495087, "learning_rate": 9.58554941241765e-05, "loss": 0.0129, "step": 17400 }, { "epoch": 23.30655957161981, "grad_norm": 0.4077117443084717, "learning_rate": 9.584890031583596e-05, "loss": 0.016, "step": 17410 }, { "epoch": 23.31994645247657, "grad_norm": 0.16744044423103333, "learning_rate": 9.584230149352861e-05, "loss": 0.0173, "step": 17420 }, { "epoch": 23.333333333333332, "grad_norm": 0.20850779116153717, "learning_rate": 9.58356976579761e-05, "loss": 0.0146, "step": 17430 }, { "epoch": 23.346720214190093, "grad_norm": 0.3458678126335144, "learning_rate": 9.58290888099006e-05, "loss": 0.0136, "step": 17440 }, { "epoch": 23.360107095046853, "grad_norm": 0.23527397215366364, "learning_rate": 9.582247495002486e-05, "loss": 0.0114, "step": 17450 }, { "epoch": 23.373493975903614, "grad_norm": 0.30000942945480347, "learning_rate": 9.581585607907214e-05, "loss": 0.011, "step": 17460 }, { "epoch": 23.386880856760374, "grad_norm": 0.3730912506580353, "learning_rate": 9.580923219776628e-05, "loss": 0.0132, "step": 17470 }, { "epoch": 23.400267737617135, "grad_norm": 0.20611348748207092, "learning_rate": 9.580260330683167e-05, "loss": 0.0145, "step": 17480 }, { "epoch": 23.413654618473895, "grad_norm": 0.23975639045238495, "learning_rate": 9.579596940699322e-05, "loss": 0.0136, "step": 17490 }, { "epoch": 23.427041499330656, "grad_norm": 0.2696129083633423, "learning_rate": 9.578933049897643e-05, "loss": 0.0134, "step": 17500 }, { "epoch": 23.440428380187416, "grad_norm": 0.16664202511310577, "learning_rate": 9.578268658350728e-05, "loss": 0.0134, "step": 17510 }, { "epoch": 23.453815261044177, "grad_norm": 0.354469895362854, "learning_rate": 9.577603766131235e-05, "loss": 0.012, "step": 17520 }, { "epoch": 23.467202141900938, "grad_norm": 0.33506011962890625, "learning_rate": 9.576938373311878e-05, "loss": 0.0146, "step": 17530 }, { "epoch": 23.480589022757698, "grad_norm": 0.18983043730258942, "learning_rate": 9.576272479965421e-05, "loss": 0.0157, "step": 17540 }, { "epoch": 23.49397590361446, "grad_norm": 0.3165211081504822, "learning_rate": 9.575606086164687e-05, "loss": 0.0169, "step": 17550 }, { "epoch": 23.50736278447122, "grad_norm": 0.3032093644142151, "learning_rate": 9.57493919198255e-05, "loss": 0.0159, "step": 17560 }, { "epoch": 23.52074966532798, "grad_norm": 0.24634705483913422, "learning_rate": 9.57427179749194e-05, "loss": 0.0155, "step": 17570 }, { "epoch": 23.53413654618474, "grad_norm": 0.2477940022945404, "learning_rate": 9.573603902765846e-05, "loss": 0.0158, "step": 17580 }, { "epoch": 23.5475234270415, "grad_norm": 0.33122777938842773, "learning_rate": 9.572935507877304e-05, "loss": 0.0184, "step": 17590 }, { "epoch": 23.56091030789826, "grad_norm": 0.2268621027469635, "learning_rate": 9.57226661289941e-05, "loss": 0.0195, "step": 17600 }, { "epoch": 23.57429718875502, "grad_norm": 0.40722212195396423, "learning_rate": 9.571597217905315e-05, "loss": 0.014, "step": 17610 }, { "epoch": 23.58768406961178, "grad_norm": 1.3481661081314087, "learning_rate": 9.57092732296822e-05, "loss": 0.016, "step": 17620 }, { "epoch": 23.60107095046854, "grad_norm": 0.4441845417022705, "learning_rate": 9.570256928161385e-05, "loss": 0.014, "step": 17630 }, { "epoch": 23.6144578313253, "grad_norm": 0.21217487752437592, "learning_rate": 9.569586033558126e-05, "loss": 0.0146, "step": 17640 }, { "epoch": 23.62784471218206, "grad_norm": 0.14397023618221283, "learning_rate": 9.568914639231807e-05, "loss": 0.0148, "step": 17650 }, { "epoch": 23.64123159303882, "grad_norm": 0.23334141075611115, "learning_rate": 9.568242745255852e-05, "loss": 0.0161, "step": 17660 }, { "epoch": 23.65461847389558, "grad_norm": 0.193083256483078, "learning_rate": 9.567570351703739e-05, "loss": 0.0158, "step": 17670 }, { "epoch": 23.668005354752342, "grad_norm": 0.3226354718208313, "learning_rate": 9.566897458649001e-05, "loss": 0.0136, "step": 17680 }, { "epoch": 23.681392235609103, "grad_norm": 0.21805325150489807, "learning_rate": 9.566224066165221e-05, "loss": 0.0132, "step": 17690 }, { "epoch": 23.694779116465863, "grad_norm": 0.22702175378799438, "learning_rate": 9.565550174326043e-05, "loss": 0.0164, "step": 17700 }, { "epoch": 23.708165997322624, "grad_norm": 0.20626693964004517, "learning_rate": 9.564875783205162e-05, "loss": 0.0162, "step": 17710 }, { "epoch": 23.721552878179384, "grad_norm": 0.17608924210071564, "learning_rate": 9.564200892876328e-05, "loss": 0.0147, "step": 17720 }, { "epoch": 23.734939759036145, "grad_norm": 0.19174987077713013, "learning_rate": 9.563525503413348e-05, "loss": 0.0127, "step": 17730 }, { "epoch": 23.748326639892905, "grad_norm": 0.2480880320072174, "learning_rate": 9.562849614890079e-05, "loss": 0.0131, "step": 17740 }, { "epoch": 23.761713520749666, "grad_norm": 1.6270478963851929, "learning_rate": 9.562173227380436e-05, "loss": 0.0126, "step": 17750 }, { "epoch": 23.775100401606426, "grad_norm": 0.24483895301818848, "learning_rate": 9.561496340958389e-05, "loss": 0.0152, "step": 17760 }, { "epoch": 23.788487282463187, "grad_norm": 0.21944212913513184, "learning_rate": 9.560818955697959e-05, "loss": 0.0151, "step": 17770 }, { "epoch": 23.801874163319948, "grad_norm": 0.19660930335521698, "learning_rate": 9.560141071673228e-05, "loss": 0.0157, "step": 17780 }, { "epoch": 23.815261044176708, "grad_norm": 0.2617850601673126, "learning_rate": 9.559462688958323e-05, "loss": 0.0144, "step": 17790 }, { "epoch": 23.82864792503347, "grad_norm": 0.31295275688171387, "learning_rate": 9.558783807627434e-05, "loss": 0.0137, "step": 17800 }, { "epoch": 23.84203480589023, "grad_norm": 0.18101944029331207, "learning_rate": 9.558104427754801e-05, "loss": 0.0139, "step": 17810 }, { "epoch": 23.855421686746986, "grad_norm": 0.2671315371990204, "learning_rate": 9.557424549414722e-05, "loss": 0.0131, "step": 17820 }, { "epoch": 23.868808567603747, "grad_norm": 0.23666304349899292, "learning_rate": 9.556744172681546e-05, "loss": 0.0151, "step": 17830 }, { "epoch": 23.882195448460507, "grad_norm": 0.22136414051055908, "learning_rate": 9.556063297629677e-05, "loss": 0.0152, "step": 17840 }, { "epoch": 23.895582329317268, "grad_norm": 0.5041967630386353, "learning_rate": 9.555381924333578e-05, "loss": 0.013, "step": 17850 }, { "epoch": 23.90896921017403, "grad_norm": 0.16861717402935028, "learning_rate": 9.554700052867758e-05, "loss": 0.0125, "step": 17860 }, { "epoch": 23.92235609103079, "grad_norm": 0.14803797006607056, "learning_rate": 9.554017683306789e-05, "loss": 0.0129, "step": 17870 }, { "epoch": 23.93574297188755, "grad_norm": 0.19012053310871124, "learning_rate": 9.553334815725294e-05, "loss": 0.0133, "step": 17880 }, { "epoch": 23.94912985274431, "grad_norm": 0.22258147597312927, "learning_rate": 9.552651450197949e-05, "loss": 0.0127, "step": 17890 }, { "epoch": 23.96251673360107, "grad_norm": 0.322729229927063, "learning_rate": 9.551967586799486e-05, "loss": 0.0119, "step": 17900 }, { "epoch": 23.97590361445783, "grad_norm": 0.18911145627498627, "learning_rate": 9.551283225604692e-05, "loss": 0.0128, "step": 17910 }, { "epoch": 23.98929049531459, "grad_norm": 0.160243421792984, "learning_rate": 9.550598366688406e-05, "loss": 0.0138, "step": 17920 }, { "epoch": 24.002677376171352, "grad_norm": 0.3955256938934326, "learning_rate": 9.549913010125526e-05, "loss": 0.0116, "step": 17930 }, { "epoch": 24.016064257028113, "grad_norm": 0.14795835316181183, "learning_rate": 9.549227155990999e-05, "loss": 0.0154, "step": 17940 }, { "epoch": 24.029451137884873, "grad_norm": 0.19619429111480713, "learning_rate": 9.548540804359828e-05, "loss": 0.0125, "step": 17950 }, { "epoch": 24.042838018741634, "grad_norm": 0.22062750160694122, "learning_rate": 9.547853955307077e-05, "loss": 0.0134, "step": 17960 }, { "epoch": 24.056224899598394, "grad_norm": 0.273221492767334, "learning_rate": 9.547166608907853e-05, "loss": 0.0125, "step": 17970 }, { "epoch": 24.069611780455155, "grad_norm": 0.28442585468292236, "learning_rate": 9.546478765237326e-05, "loss": 0.0124, "step": 17980 }, { "epoch": 24.082998661311915, "grad_norm": 0.31078478693962097, "learning_rate": 9.545790424370715e-05, "loss": 0.0138, "step": 17990 }, { "epoch": 24.096385542168676, "grad_norm": 0.1688644289970398, "learning_rate": 9.5451015863833e-05, "loss": 0.0115, "step": 18000 }, { "epoch": 24.109772423025436, "grad_norm": 0.3204922676086426, "learning_rate": 9.544412251350408e-05, "loss": 0.0149, "step": 18010 }, { "epoch": 24.123159303882197, "grad_norm": 0.17132332921028137, "learning_rate": 9.543722419347422e-05, "loss": 0.0114, "step": 18020 }, { "epoch": 24.136546184738958, "grad_norm": 0.1555328071117401, "learning_rate": 9.543032090449788e-05, "loss": 0.0144, "step": 18030 }, { "epoch": 24.149933065595715, "grad_norm": 0.3128316402435303, "learning_rate": 9.542341264732992e-05, "loss": 0.0146, "step": 18040 }, { "epoch": 24.163319946452475, "grad_norm": 0.3573691248893738, "learning_rate": 9.541649942272585e-05, "loss": 0.0143, "step": 18050 }, { "epoch": 24.176706827309236, "grad_norm": 0.22573506832122803, "learning_rate": 9.54095812314417e-05, "loss": 0.0139, "step": 18060 }, { "epoch": 24.190093708165996, "grad_norm": 0.2850129008293152, "learning_rate": 9.540265807423401e-05, "loss": 0.0167, "step": 18070 }, { "epoch": 24.203480589022757, "grad_norm": 0.34591227769851685, "learning_rate": 9.53957299518599e-05, "loss": 0.0123, "step": 18080 }, { "epoch": 24.216867469879517, "grad_norm": 0.23476256430149078, "learning_rate": 9.5388796865077e-05, "loss": 0.0134, "step": 18090 }, { "epoch": 24.230254350736278, "grad_norm": 0.24436797201633453, "learning_rate": 9.538185881464353e-05, "loss": 0.0128, "step": 18100 }, { "epoch": 24.24364123159304, "grad_norm": 0.21236802637577057, "learning_rate": 9.537491580131821e-05, "loss": 0.0143, "step": 18110 }, { "epoch": 24.2570281124498, "grad_norm": 0.19120845198631287, "learning_rate": 9.53679678258603e-05, "loss": 0.0118, "step": 18120 }, { "epoch": 24.27041499330656, "grad_norm": 0.23971930146217346, "learning_rate": 9.536101488902966e-05, "loss": 0.0127, "step": 18130 }, { "epoch": 24.28380187416332, "grad_norm": 0.40261489152908325, "learning_rate": 9.535405699158663e-05, "loss": 0.0134, "step": 18140 }, { "epoch": 24.29718875502008, "grad_norm": 0.15271373093128204, "learning_rate": 9.53470941342921e-05, "loss": 0.0122, "step": 18150 }, { "epoch": 24.31057563587684, "grad_norm": 0.17275100946426392, "learning_rate": 9.534012631790756e-05, "loss": 0.0149, "step": 18160 }, { "epoch": 24.3239625167336, "grad_norm": 0.1492404043674469, "learning_rate": 9.533315354319494e-05, "loss": 0.0105, "step": 18170 }, { "epoch": 24.337349397590362, "grad_norm": 0.20286156237125397, "learning_rate": 9.532617581091682e-05, "loss": 0.0146, "step": 18180 }, { "epoch": 24.350736278447123, "grad_norm": 1.0799394845962524, "learning_rate": 9.531919312183629e-05, "loss": 0.0187, "step": 18190 }, { "epoch": 24.364123159303883, "grad_norm": 0.17610853910446167, "learning_rate": 9.531220547671688e-05, "loss": 0.014, "step": 18200 }, { "epoch": 24.377510040160644, "grad_norm": 0.2466963529586792, "learning_rate": 9.530521287632285e-05, "loss": 0.0127, "step": 18210 }, { "epoch": 24.390896921017404, "grad_norm": 0.2757948637008667, "learning_rate": 9.529821532141884e-05, "loss": 0.0144, "step": 18220 }, { "epoch": 24.404283801874165, "grad_norm": 0.3208604156970978, "learning_rate": 9.52912128127701e-05, "loss": 0.0157, "step": 18230 }, { "epoch": 24.417670682730925, "grad_norm": 0.20242370665073395, "learning_rate": 9.528420535114244e-05, "loss": 0.0125, "step": 18240 }, { "epoch": 24.431057563587682, "grad_norm": 0.18862415850162506, "learning_rate": 9.527719293730215e-05, "loss": 0.0146, "step": 18250 }, { "epoch": 24.444444444444443, "grad_norm": 0.2388192117214203, "learning_rate": 9.527017557201611e-05, "loss": 0.0125, "step": 18260 }, { "epoch": 24.457831325301203, "grad_norm": 0.25856176018714905, "learning_rate": 9.526315325605176e-05, "loss": 0.0141, "step": 18270 }, { "epoch": 24.471218206157964, "grad_norm": 0.36268025636672974, "learning_rate": 9.525612599017699e-05, "loss": 0.0128, "step": 18280 }, { "epoch": 24.484605087014724, "grad_norm": 0.2541908919811249, "learning_rate": 9.524909377516033e-05, "loss": 0.0126, "step": 18290 }, { "epoch": 24.497991967871485, "grad_norm": 0.39441972970962524, "learning_rate": 9.524205661177081e-05, "loss": 0.0143, "step": 18300 }, { "epoch": 24.511378848728246, "grad_norm": 0.19185799360275269, "learning_rate": 9.523501450077801e-05, "loss": 0.0124, "step": 18310 }, { "epoch": 24.524765729585006, "grad_norm": 0.16049882769584656, "learning_rate": 9.522796744295202e-05, "loss": 0.0131, "step": 18320 }, { "epoch": 24.538152610441767, "grad_norm": 0.171552836894989, "learning_rate": 9.522091543906352e-05, "loss": 0.0152, "step": 18330 }, { "epoch": 24.551539491298527, "grad_norm": 0.5013549327850342, "learning_rate": 9.521385848988369e-05, "loss": 0.0157, "step": 18340 }, { "epoch": 24.564926372155288, "grad_norm": 0.24302814900875092, "learning_rate": 9.520679659618428e-05, "loss": 0.0184, "step": 18350 }, { "epoch": 24.57831325301205, "grad_norm": 0.15452629327774048, "learning_rate": 9.519972975873754e-05, "loss": 0.0154, "step": 18360 }, { "epoch": 24.59170013386881, "grad_norm": 0.24657483398914337, "learning_rate": 9.519265797831633e-05, "loss": 0.0131, "step": 18370 }, { "epoch": 24.60508701472557, "grad_norm": 0.3791729211807251, "learning_rate": 9.518558125569399e-05, "loss": 0.0153, "step": 18380 }, { "epoch": 24.61847389558233, "grad_norm": 0.21499894559383392, "learning_rate": 9.517849959164442e-05, "loss": 0.0163, "step": 18390 }, { "epoch": 24.63186077643909, "grad_norm": 0.21451561152935028, "learning_rate": 9.517141298694205e-05, "loss": 0.0174, "step": 18400 }, { "epoch": 24.64524765729585, "grad_norm": 0.35514500737190247, "learning_rate": 9.516432144236188e-05, "loss": 0.0145, "step": 18410 }, { "epoch": 24.65863453815261, "grad_norm": 0.8317099809646606, "learning_rate": 9.515722495867941e-05, "loss": 0.0157, "step": 18420 }, { "epoch": 24.672021419009372, "grad_norm": 0.13623835146427155, "learning_rate": 9.515012353667072e-05, "loss": 0.0146, "step": 18430 }, { "epoch": 24.685408299866133, "grad_norm": 0.2652827799320221, "learning_rate": 9.51430171771124e-05, "loss": 0.0153, "step": 18440 }, { "epoch": 24.698795180722893, "grad_norm": 0.2964954376220703, "learning_rate": 9.513590588078159e-05, "loss": 0.0137, "step": 18450 }, { "epoch": 24.712182061579654, "grad_norm": 1.0864592790603638, "learning_rate": 9.512878964845597e-05, "loss": 0.0117, "step": 18460 }, { "epoch": 24.72556894243641, "grad_norm": 0.44504207372665405, "learning_rate": 9.512166848091377e-05, "loss": 0.0136, "step": 18470 }, { "epoch": 24.73895582329317, "grad_norm": 0.253915399312973, "learning_rate": 9.511454237893376e-05, "loss": 0.0143, "step": 18480 }, { "epoch": 24.75234270414993, "grad_norm": 0.19741204380989075, "learning_rate": 9.51074113432952e-05, "loss": 0.0132, "step": 18490 }, { "epoch": 24.765729585006692, "grad_norm": 0.20496051013469696, "learning_rate": 9.510027537477797e-05, "loss": 0.0119, "step": 18500 }, { "epoch": 24.779116465863453, "grad_norm": 0.20292358100414276, "learning_rate": 9.509313447416242e-05, "loss": 0.0127, "step": 18510 }, { "epoch": 24.792503346720213, "grad_norm": 0.3963799774646759, "learning_rate": 9.508598864222949e-05, "loss": 0.0116, "step": 18520 }, { "epoch": 24.805890227576974, "grad_norm": 0.3742136061191559, "learning_rate": 9.507883787976062e-05, "loss": 0.0142, "step": 18530 }, { "epoch": 24.819277108433734, "grad_norm": 0.42557668685913086, "learning_rate": 9.507168218753781e-05, "loss": 0.0152, "step": 18540 }, { "epoch": 24.832663989290495, "grad_norm": 0.22349633276462555, "learning_rate": 9.506452156634362e-05, "loss": 0.013, "step": 18550 }, { "epoch": 24.846050870147256, "grad_norm": 0.1854640543460846, "learning_rate": 9.505735601696109e-05, "loss": 0.0147, "step": 18560 }, { "epoch": 24.859437751004016, "grad_norm": 0.26988574862480164, "learning_rate": 9.505018554017385e-05, "loss": 0.0136, "step": 18570 }, { "epoch": 24.872824631860777, "grad_norm": 0.24351011216640472, "learning_rate": 9.504301013676604e-05, "loss": 0.013, "step": 18580 }, { "epoch": 24.886211512717537, "grad_norm": 0.21016603708267212, "learning_rate": 9.503582980752238e-05, "loss": 0.0175, "step": 18590 }, { "epoch": 24.899598393574298, "grad_norm": 0.18045777082443237, "learning_rate": 9.502864455322809e-05, "loss": 0.0131, "step": 18600 }, { "epoch": 24.91298527443106, "grad_norm": 0.25639796257019043, "learning_rate": 9.502145437466891e-05, "loss": 0.0158, "step": 18610 }, { "epoch": 24.92637215528782, "grad_norm": 0.25433918833732605, "learning_rate": 9.501425927263116e-05, "loss": 0.016, "step": 18620 }, { "epoch": 24.93975903614458, "grad_norm": 0.15842218697071075, "learning_rate": 9.500705924790172e-05, "loss": 0.0116, "step": 18630 }, { "epoch": 24.95314591700134, "grad_norm": 0.20652161538600922, "learning_rate": 9.499985430126794e-05, "loss": 0.0141, "step": 18640 }, { "epoch": 24.9665327978581, "grad_norm": 0.3045995235443115, "learning_rate": 9.499264443351775e-05, "loss": 0.0145, "step": 18650 }, { "epoch": 24.97991967871486, "grad_norm": 0.1678410768508911, "learning_rate": 9.498542964543961e-05, "loss": 0.0154, "step": 18660 }, { "epoch": 24.99330655957162, "grad_norm": 0.4240533411502838, "learning_rate": 9.497820993782252e-05, "loss": 0.0129, "step": 18670 }, { "epoch": 25.00669344042838, "grad_norm": 0.25699836015701294, "learning_rate": 9.497098531145601e-05, "loss": 0.0146, "step": 18680 }, { "epoch": 25.02008032128514, "grad_norm": 0.9591240286827087, "learning_rate": 9.496375576713017e-05, "loss": 0.0122, "step": 18690 }, { "epoch": 25.0334672021419, "grad_norm": 0.45946288108825684, "learning_rate": 9.49565213056356e-05, "loss": 0.0159, "step": 18700 }, { "epoch": 25.04685408299866, "grad_norm": 0.16911786794662476, "learning_rate": 9.494928192776342e-05, "loss": 0.0154, "step": 18710 }, { "epoch": 25.06024096385542, "grad_norm": 0.17678304016590118, "learning_rate": 9.494203763430538e-05, "loss": 0.0149, "step": 18720 }, { "epoch": 25.07362784471218, "grad_norm": 0.40296825766563416, "learning_rate": 9.493478842605366e-05, "loss": 0.0156, "step": 18730 }, { "epoch": 25.08701472556894, "grad_norm": 0.1965089589357376, "learning_rate": 9.492753430380105e-05, "loss": 0.014, "step": 18740 }, { "epoch": 25.100401606425702, "grad_norm": 2.012805461883545, "learning_rate": 9.492027526834083e-05, "loss": 0.0153, "step": 18750 }, { "epoch": 25.113788487282463, "grad_norm": 0.3671269416809082, "learning_rate": 9.491301132046684e-05, "loss": 0.0188, "step": 18760 }, { "epoch": 25.127175368139223, "grad_norm": 0.1648692786693573, "learning_rate": 9.490574246097345e-05, "loss": 0.0151, "step": 18770 }, { "epoch": 25.140562248995984, "grad_norm": 0.24244752526283264, "learning_rate": 9.48984686906556e-05, "loss": 0.0128, "step": 18780 }, { "epoch": 25.153949129852744, "grad_norm": 0.30105018615722656, "learning_rate": 9.489119001030871e-05, "loss": 0.0152, "step": 18790 }, { "epoch": 25.167336010709505, "grad_norm": 0.1943451464176178, "learning_rate": 9.488390642072878e-05, "loss": 0.0139, "step": 18800 }, { "epoch": 25.180722891566266, "grad_norm": 0.1858823448419571, "learning_rate": 9.48766179227123e-05, "loss": 0.0143, "step": 18810 }, { "epoch": 25.194109772423026, "grad_norm": 0.3337344825267792, "learning_rate": 9.486932451705636e-05, "loss": 0.0152, "step": 18820 }, { "epoch": 25.207496653279787, "grad_norm": 0.22863055765628815, "learning_rate": 9.486202620455857e-05, "loss": 0.0148, "step": 18830 }, { "epoch": 25.220883534136547, "grad_norm": 0.2958337962627411, "learning_rate": 9.485472298601704e-05, "loss": 0.0142, "step": 18840 }, { "epoch": 25.234270414993308, "grad_norm": 0.2719837427139282, "learning_rate": 9.484741486223043e-05, "loss": 0.0146, "step": 18850 }, { "epoch": 25.24765729585007, "grad_norm": 0.2520800530910492, "learning_rate": 9.484010183399797e-05, "loss": 0.016, "step": 18860 }, { "epoch": 25.26104417670683, "grad_norm": 0.43496355414390564, "learning_rate": 9.483278390211938e-05, "loss": 0.0143, "step": 18870 }, { "epoch": 25.27443105756359, "grad_norm": 0.323858380317688, "learning_rate": 9.482546106739496e-05, "loss": 0.0155, "step": 18880 }, { "epoch": 25.287817938420346, "grad_norm": 0.38251641392707825, "learning_rate": 9.48181333306255e-05, "loss": 0.0145, "step": 18890 }, { "epoch": 25.301204819277107, "grad_norm": 0.36987221240997314, "learning_rate": 9.481080069261237e-05, "loss": 0.0162, "step": 18900 }, { "epoch": 25.314591700133867, "grad_norm": 0.16993984580039978, "learning_rate": 9.480346315415745e-05, "loss": 0.0113, "step": 18910 }, { "epoch": 25.327978580990628, "grad_norm": 0.4755409061908722, "learning_rate": 9.479612071606314e-05, "loss": 0.012, "step": 18920 }, { "epoch": 25.34136546184739, "grad_norm": 0.31486186385154724, "learning_rate": 9.478877337913244e-05, "loss": 0.0154, "step": 18930 }, { "epoch": 25.35475234270415, "grad_norm": 0.6534698605537415, "learning_rate": 9.478142114416881e-05, "loss": 0.0142, "step": 18940 }, { "epoch": 25.36813922356091, "grad_norm": 0.2696448564529419, "learning_rate": 9.47740640119763e-05, "loss": 0.0158, "step": 18950 }, { "epoch": 25.38152610441767, "grad_norm": 0.7369524240493774, "learning_rate": 9.476670198335947e-05, "loss": 0.0141, "step": 18960 }, { "epoch": 25.39491298527443, "grad_norm": 0.18445941805839539, "learning_rate": 9.47593350591234e-05, "loss": 0.0152, "step": 18970 }, { "epoch": 25.40829986613119, "grad_norm": 0.3931742012500763, "learning_rate": 9.475196324007376e-05, "loss": 0.0157, "step": 18980 }, { "epoch": 25.42168674698795, "grad_norm": 0.5220552682876587, "learning_rate": 9.474458652701669e-05, "loss": 0.017, "step": 18990 }, { "epoch": 25.435073627844712, "grad_norm": 0.329398512840271, "learning_rate": 9.473720492075892e-05, "loss": 0.0158, "step": 19000 }, { "epoch": 25.448460508701473, "grad_norm": 0.2328071892261505, "learning_rate": 9.472981842210768e-05, "loss": 0.0182, "step": 19010 }, { "epoch": 25.461847389558233, "grad_norm": 0.15077288448810577, "learning_rate": 9.472242703187074e-05, "loss": 0.0135, "step": 19020 }, { "epoch": 25.475234270414994, "grad_norm": 0.227525532245636, "learning_rate": 9.471503075085643e-05, "loss": 0.0135, "step": 19030 }, { "epoch": 25.488621151271754, "grad_norm": 0.44924265146255493, "learning_rate": 9.470762957987359e-05, "loss": 0.0134, "step": 19040 }, { "epoch": 25.502008032128515, "grad_norm": 0.13739025592803955, "learning_rate": 9.470022351973158e-05, "loss": 0.0137, "step": 19050 }, { "epoch": 25.515394912985276, "grad_norm": 0.27636513113975525, "learning_rate": 9.469281257124034e-05, "loss": 0.0172, "step": 19060 }, { "epoch": 25.528781793842036, "grad_norm": 0.7358084917068481, "learning_rate": 9.46853967352103e-05, "loss": 0.0153, "step": 19070 }, { "epoch": 25.542168674698797, "grad_norm": 0.15813791751861572, "learning_rate": 9.467797601245246e-05, "loss": 0.0145, "step": 19080 }, { "epoch": 25.555555555555557, "grad_norm": 0.2000926285982132, "learning_rate": 9.467055040377834e-05, "loss": 0.0165, "step": 19090 }, { "epoch": 25.568942436412314, "grad_norm": 0.37330639362335205, "learning_rate": 9.466311990999999e-05, "loss": 0.0138, "step": 19100 }, { "epoch": 25.582329317269075, "grad_norm": 0.25599217414855957, "learning_rate": 9.465568453193e-05, "loss": 0.0138, "step": 19110 }, { "epoch": 25.595716198125835, "grad_norm": 0.2478964924812317, "learning_rate": 9.464824427038148e-05, "loss": 0.0179, "step": 19120 }, { "epoch": 25.609103078982596, "grad_norm": 0.5284813642501831, "learning_rate": 9.46407991261681e-05, "loss": 0.0148, "step": 19130 }, { "epoch": 25.622489959839356, "grad_norm": 0.188141867518425, "learning_rate": 9.463334910010404e-05, "loss": 0.0131, "step": 19140 }, { "epoch": 25.635876840696117, "grad_norm": 0.2347090095281601, "learning_rate": 9.462589419300403e-05, "loss": 0.0154, "step": 19150 }, { "epoch": 25.649263721552877, "grad_norm": 0.17555779218673706, "learning_rate": 9.461843440568333e-05, "loss": 0.0159, "step": 19160 }, { "epoch": 25.662650602409638, "grad_norm": 0.2236834317445755, "learning_rate": 9.461096973895773e-05, "loss": 0.0151, "step": 19170 }, { "epoch": 25.6760374832664, "grad_norm": 0.3766448199748993, "learning_rate": 9.460350019364355e-05, "loss": 0.0126, "step": 19180 }, { "epoch": 25.68942436412316, "grad_norm": 0.15073996782302856, "learning_rate": 9.459602577055764e-05, "loss": 0.0128, "step": 19190 }, { "epoch": 25.70281124497992, "grad_norm": 0.1835249662399292, "learning_rate": 9.45885464705174e-05, "loss": 0.0141, "step": 19200 }, { "epoch": 25.71619812583668, "grad_norm": 0.8235316872596741, "learning_rate": 9.458106229434076e-05, "loss": 0.0174, "step": 19210 }, { "epoch": 25.72958500669344, "grad_norm": 0.30413535237312317, "learning_rate": 9.457357324284617e-05, "loss": 0.0136, "step": 19220 }, { "epoch": 25.7429718875502, "grad_norm": 0.2618063688278198, "learning_rate": 9.456607931685262e-05, "loss": 0.0144, "step": 19230 }, { "epoch": 25.75635876840696, "grad_norm": 0.2913195788860321, "learning_rate": 9.455858051717965e-05, "loss": 0.0142, "step": 19240 }, { "epoch": 25.769745649263722, "grad_norm": 0.16231021285057068, "learning_rate": 9.45510768446473e-05, "loss": 0.014, "step": 19250 }, { "epoch": 25.783132530120483, "grad_norm": 0.45641621947288513, "learning_rate": 9.454356830007618e-05, "loss": 0.0151, "step": 19260 }, { "epoch": 25.796519410977243, "grad_norm": 0.26161545515060425, "learning_rate": 9.45360548842874e-05, "loss": 0.0159, "step": 19270 }, { "epoch": 25.809906291834004, "grad_norm": 0.4883521497249603, "learning_rate": 9.452853659810261e-05, "loss": 0.0141, "step": 19280 }, { "epoch": 25.823293172690764, "grad_norm": 1.1386888027191162, "learning_rate": 9.452101344234401e-05, "loss": 0.0141, "step": 19290 }, { "epoch": 25.836680053547525, "grad_norm": 0.6615814566612244, "learning_rate": 9.451348541783431e-05, "loss": 0.019, "step": 19300 }, { "epoch": 25.850066934404285, "grad_norm": 0.4600994884967804, "learning_rate": 9.450595252539678e-05, "loss": 0.0132, "step": 19310 }, { "epoch": 25.863453815261042, "grad_norm": 1.0129680633544922, "learning_rate": 9.449841476585518e-05, "loss": 0.0167, "step": 19320 }, { "epoch": 25.876840696117803, "grad_norm": 0.14376066625118256, "learning_rate": 9.449087214003384e-05, "loss": 0.0149, "step": 19330 }, { "epoch": 25.890227576974564, "grad_norm": 0.6334617137908936, "learning_rate": 9.448332464875765e-05, "loss": 0.016, "step": 19340 }, { "epoch": 25.903614457831324, "grad_norm": 0.19114546477794647, "learning_rate": 9.447577229285192e-05, "loss": 0.0123, "step": 19350 }, { "epoch": 25.917001338688085, "grad_norm": 0.19602955877780914, "learning_rate": 9.446821507314261e-05, "loss": 0.0154, "step": 19360 }, { "epoch": 25.930388219544845, "grad_norm": 0.18942351639270782, "learning_rate": 9.446065299045617e-05, "loss": 0.0124, "step": 19370 }, { "epoch": 25.943775100401606, "grad_norm": 0.20911145210266113, "learning_rate": 9.445308604561955e-05, "loss": 0.0129, "step": 19380 }, { "epoch": 25.957161981258366, "grad_norm": 0.3277495503425598, "learning_rate": 9.444551423946028e-05, "loss": 0.0151, "step": 19390 }, { "epoch": 25.970548862115127, "grad_norm": 0.15802162885665894, "learning_rate": 9.443793757280638e-05, "loss": 0.0138, "step": 19400 }, { "epoch": 25.983935742971887, "grad_norm": 0.2502221465110779, "learning_rate": 9.443035604648646e-05, "loss": 0.0175, "step": 19410 }, { "epoch": 25.997322623828648, "grad_norm": 0.4484037458896637, "learning_rate": 9.44227696613296e-05, "loss": 0.0154, "step": 19420 }, { "epoch": 26.01070950468541, "grad_norm": 0.21435829997062683, "learning_rate": 9.441517841816542e-05, "loss": 0.0137, "step": 19430 }, { "epoch": 26.02409638554217, "grad_norm": 0.14937140047550201, "learning_rate": 9.440758231782413e-05, "loss": 0.0126, "step": 19440 }, { "epoch": 26.03748326639893, "grad_norm": 0.16429050266742706, "learning_rate": 9.439998136113639e-05, "loss": 0.0126, "step": 19450 }, { "epoch": 26.05087014725569, "grad_norm": 0.5495272874832153, "learning_rate": 9.439237554893344e-05, "loss": 0.0166, "step": 19460 }, { "epoch": 26.06425702811245, "grad_norm": 0.25954461097717285, "learning_rate": 9.438476488204705e-05, "loss": 0.0115, "step": 19470 }, { "epoch": 26.07764390896921, "grad_norm": 0.21519868075847626, "learning_rate": 9.43771493613095e-05, "loss": 0.0126, "step": 19480 }, { "epoch": 26.09103078982597, "grad_norm": 0.22997337579727173, "learning_rate": 9.436952898755362e-05, "loss": 0.011, "step": 19490 }, { "epoch": 26.104417670682732, "grad_norm": 0.15256783366203308, "learning_rate": 9.436190376161276e-05, "loss": 0.0112, "step": 19500 }, { "epoch": 26.117804551539493, "grad_norm": 0.21945153176784515, "learning_rate": 9.43542736843208e-05, "loss": 0.0108, "step": 19510 }, { "epoch": 26.131191432396253, "grad_norm": 0.23396694660186768, "learning_rate": 9.434663875651216e-05, "loss": 0.0121, "step": 19520 }, { "epoch": 26.14457831325301, "grad_norm": 0.2089610993862152, "learning_rate": 9.433899897902177e-05, "loss": 0.0163, "step": 19530 }, { "epoch": 26.15796519410977, "grad_norm": 0.24311354756355286, "learning_rate": 9.433135435268511e-05, "loss": 0.0123, "step": 19540 }, { "epoch": 26.17135207496653, "grad_norm": 0.1949852555990219, "learning_rate": 9.432370487833819e-05, "loss": 0.0123, "step": 19550 }, { "epoch": 26.184738955823292, "grad_norm": 0.14286182820796967, "learning_rate": 9.431605055681756e-05, "loss": 0.0106, "step": 19560 }, { "epoch": 26.198125836680052, "grad_norm": 0.21382836997509003, "learning_rate": 9.430839138896026e-05, "loss": 0.0141, "step": 19570 }, { "epoch": 26.211512717536813, "grad_norm": 0.2554962635040283, "learning_rate": 9.43007273756039e-05, "loss": 0.0136, "step": 19580 }, { "epoch": 26.224899598393574, "grad_norm": 0.25734743475914, "learning_rate": 9.429305851758658e-05, "loss": 0.0131, "step": 19590 }, { "epoch": 26.238286479250334, "grad_norm": 0.2171589583158493, "learning_rate": 9.428538481574699e-05, "loss": 0.0124, "step": 19600 }, { "epoch": 26.251673360107095, "grad_norm": 0.20751678943634033, "learning_rate": 9.42777062709243e-05, "loss": 0.0116, "step": 19610 }, { "epoch": 26.265060240963855, "grad_norm": 0.33698222041130066, "learning_rate": 9.427002288395821e-05, "loss": 0.0129, "step": 19620 }, { "epoch": 26.278447121820616, "grad_norm": 0.19381384551525116, "learning_rate": 9.426233465568898e-05, "loss": 0.0126, "step": 19630 }, { "epoch": 26.291834002677376, "grad_norm": 0.20733971893787384, "learning_rate": 9.42546415869574e-05, "loss": 0.014, "step": 19640 }, { "epoch": 26.305220883534137, "grad_norm": 0.16026079654693604, "learning_rate": 9.424694367860473e-05, "loss": 0.0147, "step": 19650 }, { "epoch": 26.318607764390897, "grad_norm": 0.24706406891345978, "learning_rate": 9.423924093147284e-05, "loss": 0.0129, "step": 19660 }, { "epoch": 26.331994645247658, "grad_norm": 0.29911741614341736, "learning_rate": 9.423153334640407e-05, "loss": 0.0132, "step": 19670 }, { "epoch": 26.34538152610442, "grad_norm": 0.27014589309692383, "learning_rate": 9.42238209242413e-05, "loss": 0.0173, "step": 19680 }, { "epoch": 26.35876840696118, "grad_norm": 0.2310331016778946, "learning_rate": 9.421610366582798e-05, "loss": 0.0142, "step": 19690 }, { "epoch": 26.37215528781794, "grad_norm": 0.24131697416305542, "learning_rate": 9.420838157200803e-05, "loss": 0.0125, "step": 19700 }, { "epoch": 26.3855421686747, "grad_norm": 0.34252646565437317, "learning_rate": 9.420065464362594e-05, "loss": 0.0173, "step": 19710 }, { "epoch": 26.39892904953146, "grad_norm": 0.19581104815006256, "learning_rate": 9.419292288152673e-05, "loss": 0.013, "step": 19720 }, { "epoch": 26.41231593038822, "grad_norm": 0.2803940176963806, "learning_rate": 9.418518628655588e-05, "loss": 0.0119, "step": 19730 }, { "epoch": 26.42570281124498, "grad_norm": 0.2511027157306671, "learning_rate": 9.417744485955951e-05, "loss": 0.0141, "step": 19740 }, { "epoch": 26.43908969210174, "grad_norm": 0.393145352602005, "learning_rate": 9.41696986013842e-05, "loss": 0.0131, "step": 19750 }, { "epoch": 26.4524765729585, "grad_norm": 0.13161689043045044, "learning_rate": 9.416194751287705e-05, "loss": 0.0116, "step": 19760 }, { "epoch": 26.46586345381526, "grad_norm": 0.18141883611679077, "learning_rate": 9.415419159488572e-05, "loss": 0.0134, "step": 19770 }, { "epoch": 26.47925033467202, "grad_norm": 0.7245981693267822, "learning_rate": 9.414643084825837e-05, "loss": 0.0172, "step": 19780 }, { "epoch": 26.49263721552878, "grad_norm": 0.1689685434103012, "learning_rate": 9.413866527384372e-05, "loss": 0.0121, "step": 19790 }, { "epoch": 26.50602409638554, "grad_norm": 0.1905074119567871, "learning_rate": 9.4130894872491e-05, "loss": 0.0126, "step": 19800 }, { "epoch": 26.519410977242302, "grad_norm": 0.21189472079277039, "learning_rate": 9.412311964504998e-05, "loss": 0.0143, "step": 19810 }, { "epoch": 26.532797858099062, "grad_norm": 0.1901445984840393, "learning_rate": 9.411533959237091e-05, "loss": 0.0147, "step": 19820 }, { "epoch": 26.546184738955823, "grad_norm": 0.2485426366329193, "learning_rate": 9.410755471530464e-05, "loss": 0.0125, "step": 19830 }, { "epoch": 26.559571619812584, "grad_norm": 0.20844009518623352, "learning_rate": 9.40997650147025e-05, "loss": 0.0129, "step": 19840 }, { "epoch": 26.572958500669344, "grad_norm": 0.21129503846168518, "learning_rate": 9.409197049141637e-05, "loss": 0.0124, "step": 19850 }, { "epoch": 26.586345381526105, "grad_norm": 0.22192205488681793, "learning_rate": 9.408417114629863e-05, "loss": 0.0141, "step": 19860 }, { "epoch": 26.599732262382865, "grad_norm": 0.3818089962005615, "learning_rate": 9.40763669802022e-05, "loss": 0.014, "step": 19870 }, { "epoch": 26.613119143239626, "grad_norm": 0.3144623041152954, "learning_rate": 9.406855799398056e-05, "loss": 0.0138, "step": 19880 }, { "epoch": 26.626506024096386, "grad_norm": 0.9246140122413635, "learning_rate": 9.406074418848767e-05, "loss": 0.0121, "step": 19890 }, { "epoch": 26.639892904953147, "grad_norm": 0.16679014265537262, "learning_rate": 9.405292556457805e-05, "loss": 0.0127, "step": 19900 }, { "epoch": 26.653279785809907, "grad_norm": 0.2521178424358368, "learning_rate": 9.404510212310671e-05, "loss": 0.0127, "step": 19910 }, { "epoch": 26.666666666666668, "grad_norm": 0.22288180887699127, "learning_rate": 9.403727386492924e-05, "loss": 0.0115, "step": 19920 }, { "epoch": 26.68005354752343, "grad_norm": 0.22267693281173706, "learning_rate": 9.40294407909017e-05, "loss": 0.0146, "step": 19930 }, { "epoch": 26.69344042838019, "grad_norm": 0.18848778307437897, "learning_rate": 9.40216029018807e-05, "loss": 0.0139, "step": 19940 }, { "epoch": 26.70682730923695, "grad_norm": 0.4689028263092041, "learning_rate": 9.401376019872338e-05, "loss": 0.0157, "step": 19950 }, { "epoch": 26.720214190093706, "grad_norm": 0.46454599499702454, "learning_rate": 9.400591268228746e-05, "loss": 0.0155, "step": 19960 }, { "epoch": 26.733601070950467, "grad_norm": 0.8487290143966675, "learning_rate": 9.399806035343106e-05, "loss": 0.013, "step": 19970 }, { "epoch": 26.746987951807228, "grad_norm": 0.21881774067878723, "learning_rate": 9.399020321301294e-05, "loss": 0.0162, "step": 19980 }, { "epoch": 26.760374832663988, "grad_norm": 0.27677828073501587, "learning_rate": 9.398234126189234e-05, "loss": 0.012, "step": 19990 }, { "epoch": 26.77376171352075, "grad_norm": 0.43681052327156067, "learning_rate": 9.397447450092902e-05, "loss": 0.0123, "step": 20000 }, { "epoch": 26.78714859437751, "grad_norm": 0.13114216923713684, "learning_rate": 9.39666029309833e-05, "loss": 0.0138, "step": 20010 }, { "epoch": 26.80053547523427, "grad_norm": 0.24161078035831451, "learning_rate": 9.395872655291596e-05, "loss": 0.012, "step": 20020 }, { "epoch": 26.81392235609103, "grad_norm": 0.19049812853336334, "learning_rate": 9.395084536758838e-05, "loss": 0.0115, "step": 20030 }, { "epoch": 26.82730923694779, "grad_norm": 0.6544800400733948, "learning_rate": 9.394295937586243e-05, "loss": 0.0138, "step": 20040 }, { "epoch": 26.84069611780455, "grad_norm": 0.27083736658096313, "learning_rate": 9.393506857860052e-05, "loss": 0.0132, "step": 20050 }, { "epoch": 26.854082998661312, "grad_norm": 0.17760449647903442, "learning_rate": 9.392717297666555e-05, "loss": 0.0131, "step": 20060 }, { "epoch": 26.867469879518072, "grad_norm": 0.15054582059383392, "learning_rate": 9.391927257092101e-05, "loss": 0.0111, "step": 20070 }, { "epoch": 26.880856760374833, "grad_norm": 0.1656271070241928, "learning_rate": 9.391136736223085e-05, "loss": 0.0122, "step": 20080 }, { "epoch": 26.894243641231594, "grad_norm": 0.22136959433555603, "learning_rate": 9.390345735145956e-05, "loss": 0.0137, "step": 20090 }, { "epoch": 26.907630522088354, "grad_norm": 0.19400739669799805, "learning_rate": 9.389554253947219e-05, "loss": 0.0152, "step": 20100 }, { "epoch": 26.921017402945115, "grad_norm": 0.2608811855316162, "learning_rate": 9.388762292713428e-05, "loss": 0.0137, "step": 20110 }, { "epoch": 26.934404283801875, "grad_norm": 0.2557448446750641, "learning_rate": 9.38796985153119e-05, "loss": 0.0135, "step": 20120 }, { "epoch": 26.947791164658636, "grad_norm": 0.45128288865089417, "learning_rate": 9.387176930487169e-05, "loss": 0.0132, "step": 20130 }, { "epoch": 26.961178045515396, "grad_norm": 0.29165810346603394, "learning_rate": 9.386383529668072e-05, "loss": 0.0124, "step": 20140 }, { "epoch": 26.974564926372157, "grad_norm": 0.18580380082130432, "learning_rate": 9.385589649160669e-05, "loss": 0.0126, "step": 20150 }, { "epoch": 26.987951807228917, "grad_norm": 0.21663540601730347, "learning_rate": 9.384795289051775e-05, "loss": 0.0119, "step": 20160 }, { "epoch": 27.001338688085674, "grad_norm": 0.22266227006912231, "learning_rate": 9.384000449428261e-05, "loss": 0.0121, "step": 20170 }, { "epoch": 27.014725568942435, "grad_norm": 0.7041821479797363, "learning_rate": 9.383205130377048e-05, "loss": 0.0146, "step": 20180 }, { "epoch": 27.028112449799195, "grad_norm": 0.40898969769477844, "learning_rate": 9.382409331985114e-05, "loss": 0.013, "step": 20190 }, { "epoch": 27.041499330655956, "grad_norm": 0.58643639087677, "learning_rate": 9.381613054339482e-05, "loss": 0.0124, "step": 20200 }, { "epoch": 27.054886211512716, "grad_norm": 0.24314995110034943, "learning_rate": 9.380816297527235e-05, "loss": 0.0149, "step": 20210 }, { "epoch": 27.068273092369477, "grad_norm": 0.26217034459114075, "learning_rate": 9.380019061635506e-05, "loss": 0.0127, "step": 20220 }, { "epoch": 27.081659973226238, "grad_norm": 0.16939295828342438, "learning_rate": 9.379221346751474e-05, "loss": 0.0154, "step": 20230 }, { "epoch": 27.095046854082998, "grad_norm": 0.17878814041614532, "learning_rate": 9.378423152962382e-05, "loss": 0.0144, "step": 20240 }, { "epoch": 27.10843373493976, "grad_norm": 0.20172685384750366, "learning_rate": 9.377624480355517e-05, "loss": 0.014, "step": 20250 }, { "epoch": 27.12182061579652, "grad_norm": 0.20874078571796417, "learning_rate": 9.376825329018219e-05, "loss": 0.0145, "step": 20260 }, { "epoch": 27.13520749665328, "grad_norm": 0.37191691994667053, "learning_rate": 9.376025699037884e-05, "loss": 0.0117, "step": 20270 }, { "epoch": 27.14859437751004, "grad_norm": 0.23125042021274567, "learning_rate": 9.37522559050196e-05, "loss": 0.0145, "step": 20280 }, { "epoch": 27.1619812583668, "grad_norm": 0.3419362008571625, "learning_rate": 9.37442500349794e-05, "loss": 0.0147, "step": 20290 }, { "epoch": 27.17536813922356, "grad_norm": 0.2005831003189087, "learning_rate": 9.373623938113381e-05, "loss": 0.0133, "step": 20300 }, { "epoch": 27.188755020080322, "grad_norm": 0.2685353755950928, "learning_rate": 9.372822394435883e-05, "loss": 0.0132, "step": 20310 }, { "epoch": 27.202141900937082, "grad_norm": 0.26587530970573425, "learning_rate": 9.372020372553102e-05, "loss": 0.0139, "step": 20320 }, { "epoch": 27.215528781793843, "grad_norm": 0.18670079112052917, "learning_rate": 9.371217872552746e-05, "loss": 0.0124, "step": 20330 }, { "epoch": 27.228915662650603, "grad_norm": 0.352087140083313, "learning_rate": 9.370414894522576e-05, "loss": 0.0103, "step": 20340 }, { "epoch": 27.242302543507364, "grad_norm": 0.21277858316898346, "learning_rate": 9.369611438550406e-05, "loss": 0.0112, "step": 20350 }, { "epoch": 27.255689424364125, "grad_norm": 0.2090688794851303, "learning_rate": 9.368807504724095e-05, "loss": 0.0114, "step": 20360 }, { "epoch": 27.269076305220885, "grad_norm": 0.1336497813463211, "learning_rate": 9.368003093131565e-05, "loss": 0.0125, "step": 20370 }, { "epoch": 27.282463186077646, "grad_norm": 0.18847987055778503, "learning_rate": 9.367198203860785e-05, "loss": 0.0124, "step": 20380 }, { "epoch": 27.295850066934403, "grad_norm": 0.22106793522834778, "learning_rate": 9.366392836999774e-05, "loss": 0.013, "step": 20390 }, { "epoch": 27.309236947791163, "grad_norm": 0.1747901290655136, "learning_rate": 9.365586992636607e-05, "loss": 0.0149, "step": 20400 }, { "epoch": 27.322623828647924, "grad_norm": 0.16386935114860535, "learning_rate": 9.364780670859412e-05, "loss": 0.0111, "step": 20410 }, { "epoch": 27.336010709504684, "grad_norm": 0.33671051263809204, "learning_rate": 9.363973871756364e-05, "loss": 0.0126, "step": 20420 }, { "epoch": 27.349397590361445, "grad_norm": 0.2872264087200165, "learning_rate": 9.363166595415696e-05, "loss": 0.0112, "step": 20430 }, { "epoch": 27.362784471218205, "grad_norm": 0.2576237916946411, "learning_rate": 9.362358841925686e-05, "loss": 0.0134, "step": 20440 }, { "epoch": 27.376171352074966, "grad_norm": 0.2179328203201294, "learning_rate": 9.361550611374674e-05, "loss": 0.014, "step": 20450 }, { "epoch": 27.389558232931726, "grad_norm": 0.215264692902565, "learning_rate": 9.360741903851043e-05, "loss": 0.0132, "step": 20460 }, { "epoch": 27.402945113788487, "grad_norm": 0.22820666432380676, "learning_rate": 9.359932719443236e-05, "loss": 0.0131, "step": 20470 }, { "epoch": 27.416331994645248, "grad_norm": 0.1160714402794838, "learning_rate": 9.35912305823974e-05, "loss": 0.0126, "step": 20480 }, { "epoch": 27.429718875502008, "grad_norm": 0.19988010823726654, "learning_rate": 9.358312920329101e-05, "loss": 0.0122, "step": 20490 }, { "epoch": 27.44310575635877, "grad_norm": 0.22222208976745605, "learning_rate": 9.357502305799914e-05, "loss": 0.015, "step": 20500 }, { "epoch": 27.45649263721553, "grad_norm": 0.2469080239534378, "learning_rate": 9.356691214740824e-05, "loss": 0.0139, "step": 20510 }, { "epoch": 27.46987951807229, "grad_norm": 2.040977954864502, "learning_rate": 9.355879647240535e-05, "loss": 0.0139, "step": 20520 }, { "epoch": 27.48326639892905, "grad_norm": 0.19220513105392456, "learning_rate": 9.355067603387798e-05, "loss": 0.0118, "step": 20530 }, { "epoch": 27.49665327978581, "grad_norm": 0.14545971155166626, "learning_rate": 9.354255083271412e-05, "loss": 0.0123, "step": 20540 }, { "epoch": 27.51004016064257, "grad_norm": 0.2056397646665573, "learning_rate": 9.353442086980239e-05, "loss": 0.0124, "step": 20550 }, { "epoch": 27.523427041499332, "grad_norm": 0.19598570466041565, "learning_rate": 9.352628614603185e-05, "loss": 0.0135, "step": 20560 }, { "epoch": 27.536813922356092, "grad_norm": 0.19755254685878754, "learning_rate": 9.351814666229209e-05, "loss": 0.0111, "step": 20570 }, { "epoch": 27.550200803212853, "grad_norm": 0.18974944949150085, "learning_rate": 9.351000241947324e-05, "loss": 0.0124, "step": 20580 }, { "epoch": 27.563587684069613, "grad_norm": 0.22975869476795197, "learning_rate": 9.350185341846594e-05, "loss": 0.0132, "step": 20590 }, { "epoch": 27.57697456492637, "grad_norm": 0.2804816663265228, "learning_rate": 9.349369966016134e-05, "loss": 0.0137, "step": 20600 }, { "epoch": 27.59036144578313, "grad_norm": 0.26007670164108276, "learning_rate": 9.348554114545117e-05, "loss": 0.0125, "step": 20610 }, { "epoch": 27.60374832663989, "grad_norm": 1.3916549682617188, "learning_rate": 9.347737787522758e-05, "loss": 0.0152, "step": 20620 }, { "epoch": 27.617135207496652, "grad_norm": 0.19645005464553833, "learning_rate": 9.346920985038332e-05, "loss": 0.0149, "step": 20630 }, { "epoch": 27.630522088353413, "grad_norm": 0.19748175144195557, "learning_rate": 9.346103707181162e-05, "loss": 0.012, "step": 20640 }, { "epoch": 27.643908969210173, "grad_norm": 0.15404397249221802, "learning_rate": 9.345285954040626e-05, "loss": 0.0119, "step": 20650 }, { "epoch": 27.657295850066934, "grad_norm": 0.24155141413211823, "learning_rate": 9.34446772570615e-05, "loss": 0.0117, "step": 20660 }, { "epoch": 27.670682730923694, "grad_norm": 0.17754749953746796, "learning_rate": 9.343649022267214e-05, "loss": 0.0115, "step": 20670 }, { "epoch": 27.684069611780455, "grad_norm": 0.36349785327911377, "learning_rate": 9.342829843813353e-05, "loss": 0.0142, "step": 20680 }, { "epoch": 27.697456492637215, "grad_norm": 0.22548004984855652, "learning_rate": 9.342010190434149e-05, "loss": 0.0114, "step": 20690 }, { "epoch": 27.710843373493976, "grad_norm": 1.314886450767517, "learning_rate": 9.34119006221924e-05, "loss": 0.0123, "step": 20700 }, { "epoch": 27.724230254350736, "grad_norm": 1.0705885887145996, "learning_rate": 9.340369459258313e-05, "loss": 0.0146, "step": 20710 }, { "epoch": 27.737617135207497, "grad_norm": 0.1983121782541275, "learning_rate": 9.339548381641106e-05, "loss": 0.0142, "step": 20720 }, { "epoch": 27.751004016064257, "grad_norm": 0.15117059648036957, "learning_rate": 9.338726829457413e-05, "loss": 0.0121, "step": 20730 }, { "epoch": 27.764390896921018, "grad_norm": 0.26223328709602356, "learning_rate": 9.337904802797078e-05, "loss": 0.0152, "step": 20740 }, { "epoch": 27.77777777777778, "grad_norm": 0.30036282539367676, "learning_rate": 9.337082301749993e-05, "loss": 0.0133, "step": 20750 }, { "epoch": 27.79116465863454, "grad_norm": 0.24880075454711914, "learning_rate": 9.336259326406109e-05, "loss": 0.0111, "step": 20760 }, { "epoch": 27.8045515394913, "grad_norm": 0.2221275418996811, "learning_rate": 9.335435876855427e-05, "loss": 0.0137, "step": 20770 }, { "epoch": 27.81793842034806, "grad_norm": 0.19868385791778564, "learning_rate": 9.334611953187994e-05, "loss": 0.0116, "step": 20780 }, { "epoch": 27.83132530120482, "grad_norm": 0.1782885640859604, "learning_rate": 9.333787555493914e-05, "loss": 0.0119, "step": 20790 }, { "epoch": 27.84471218206158, "grad_norm": 2.3401100635528564, "learning_rate": 9.332962683863345e-05, "loss": 0.0118, "step": 20800 }, { "epoch": 27.858099062918342, "grad_norm": 0.19369138777256012, "learning_rate": 9.332137338386489e-05, "loss": 0.0106, "step": 20810 }, { "epoch": 27.8714859437751, "grad_norm": 0.47218024730682373, "learning_rate": 9.33131151915361e-05, "loss": 0.0117, "step": 20820 }, { "epoch": 27.88487282463186, "grad_norm": 0.24448858201503754, "learning_rate": 9.330485226255012e-05, "loss": 0.0143, "step": 20830 }, { "epoch": 27.89825970548862, "grad_norm": 0.28097978234291077, "learning_rate": 9.329658459781061e-05, "loss": 0.0125, "step": 20840 }, { "epoch": 27.91164658634538, "grad_norm": 0.5933621525764465, "learning_rate": 9.328831219822172e-05, "loss": 0.0136, "step": 20850 }, { "epoch": 27.92503346720214, "grad_norm": 0.2537844181060791, "learning_rate": 9.328003506468808e-05, "loss": 0.0131, "step": 20860 }, { "epoch": 27.9384203480589, "grad_norm": 0.2638619840145111, "learning_rate": 9.327175319811488e-05, "loss": 0.0138, "step": 20870 }, { "epoch": 27.951807228915662, "grad_norm": 0.7818955779075623, "learning_rate": 9.326346659940781e-05, "loss": 0.0157, "step": 20880 }, { "epoch": 27.965194109772423, "grad_norm": 0.20106425881385803, "learning_rate": 9.325517526947308e-05, "loss": 0.013, "step": 20890 }, { "epoch": 27.978580990629183, "grad_norm": 0.17448720335960388, "learning_rate": 9.32468792092174e-05, "loss": 0.0116, "step": 20900 }, { "epoch": 27.991967871485944, "grad_norm": 0.41671210527420044, "learning_rate": 9.323857841954803e-05, "loss": 0.0122, "step": 20910 }, { "epoch": 28.005354752342704, "grad_norm": 0.21001321077346802, "learning_rate": 9.323027290137276e-05, "loss": 0.0133, "step": 20920 }, { "epoch": 28.018741633199465, "grad_norm": 0.23424392938613892, "learning_rate": 9.322196265559981e-05, "loss": 0.0137, "step": 20930 }, { "epoch": 28.032128514056225, "grad_norm": 1.0922045707702637, "learning_rate": 9.321364768313803e-05, "loss": 0.0149, "step": 20940 }, { "epoch": 28.045515394912986, "grad_norm": 0.5780507326126099, "learning_rate": 9.32053279848967e-05, "loss": 0.0119, "step": 20950 }, { "epoch": 28.058902275769746, "grad_norm": 0.15011069178581238, "learning_rate": 9.319700356178567e-05, "loss": 0.0118, "step": 20960 }, { "epoch": 28.072289156626507, "grad_norm": 0.189581960439682, "learning_rate": 9.318867441471527e-05, "loss": 0.0137, "step": 20970 }, { "epoch": 28.085676037483267, "grad_norm": 1.1995896100997925, "learning_rate": 9.318034054459637e-05, "loss": 0.0116, "step": 20980 }, { "epoch": 28.099062918340028, "grad_norm": 0.8004634380340576, "learning_rate": 9.317200195234034e-05, "loss": 0.0109, "step": 20990 }, { "epoch": 28.11244979919679, "grad_norm": 0.3541252315044403, "learning_rate": 9.316365863885909e-05, "loss": 0.0129, "step": 21000 }, { "epoch": 28.12583668005355, "grad_norm": 0.19520677626132965, "learning_rate": 9.315531060506502e-05, "loss": 0.0118, "step": 21010 }, { "epoch": 28.13922356091031, "grad_norm": 0.26291415095329285, "learning_rate": 9.314695785187108e-05, "loss": 0.0124, "step": 21020 }, { "epoch": 28.152610441767067, "grad_norm": 0.20765015482902527, "learning_rate": 9.313860038019069e-05, "loss": 0.0128, "step": 21030 }, { "epoch": 28.165997322623827, "grad_norm": 0.20332367718219757, "learning_rate": 9.313023819093782e-05, "loss": 0.0141, "step": 21040 }, { "epoch": 28.179384203480588, "grad_norm": 0.3661212921142578, "learning_rate": 9.312187128502695e-05, "loss": 0.0131, "step": 21050 }, { "epoch": 28.19277108433735, "grad_norm": 0.18905724585056305, "learning_rate": 9.311349966337307e-05, "loss": 0.0147, "step": 21060 }, { "epoch": 28.20615796519411, "grad_norm": 0.19106456637382507, "learning_rate": 9.310512332689169e-05, "loss": 0.0114, "step": 21070 }, { "epoch": 28.21954484605087, "grad_norm": 0.17276348173618317, "learning_rate": 9.309674227649883e-05, "loss": 0.0125, "step": 21080 }, { "epoch": 28.23293172690763, "grad_norm": 0.9283546209335327, "learning_rate": 9.308835651311103e-05, "loss": 0.0124, "step": 21090 }, { "epoch": 28.24631860776439, "grad_norm": 0.2203538566827774, "learning_rate": 9.307996603764533e-05, "loss": 0.0133, "step": 21100 }, { "epoch": 28.25970548862115, "grad_norm": 0.27452224493026733, "learning_rate": 9.307157085101932e-05, "loss": 0.013, "step": 21110 }, { "epoch": 28.27309236947791, "grad_norm": 0.272666335105896, "learning_rate": 9.306317095415109e-05, "loss": 0.0124, "step": 21120 }, { "epoch": 28.286479250334672, "grad_norm": 0.2464371919631958, "learning_rate": 9.305476634795922e-05, "loss": 0.0134, "step": 21130 }, { "epoch": 28.299866131191433, "grad_norm": 0.6284334063529968, "learning_rate": 9.304635703336284e-05, "loss": 0.0119, "step": 21140 }, { "epoch": 28.313253012048193, "grad_norm": 0.21901988983154297, "learning_rate": 9.303794301128157e-05, "loss": 0.0131, "step": 21150 }, { "epoch": 28.326639892904954, "grad_norm": 0.1791936308145523, "learning_rate": 9.302952428263555e-05, "loss": 0.011, "step": 21160 }, { "epoch": 28.340026773761714, "grad_norm": 0.18318268656730652, "learning_rate": 9.302110084834545e-05, "loss": 0.0125, "step": 21170 }, { "epoch": 28.353413654618475, "grad_norm": 0.24350732564926147, "learning_rate": 9.301267270933245e-05, "loss": 0.0126, "step": 21180 }, { "epoch": 28.366800535475235, "grad_norm": 0.22902041673660278, "learning_rate": 9.300423986651823e-05, "loss": 0.0138, "step": 21190 }, { "epoch": 28.380187416331996, "grad_norm": 0.4144304096698761, "learning_rate": 9.299580232082501e-05, "loss": 0.0132, "step": 21200 }, { "epoch": 28.393574297188756, "grad_norm": 0.23882897198200226, "learning_rate": 9.298736007317547e-05, "loss": 0.0133, "step": 21210 }, { "epoch": 28.406961178045517, "grad_norm": 0.5443134307861328, "learning_rate": 9.297891312449288e-05, "loss": 0.0118, "step": 21220 }, { "epoch": 28.420348058902277, "grad_norm": 0.14886154234409332, "learning_rate": 9.297046147570094e-05, "loss": 0.0134, "step": 21230 }, { "epoch": 28.433734939759034, "grad_norm": 0.252177894115448, "learning_rate": 9.296200512772396e-05, "loss": 0.0114, "step": 21240 }, { "epoch": 28.447121820615795, "grad_norm": 0.24444109201431274, "learning_rate": 9.295354408148668e-05, "loss": 0.0122, "step": 21250 }, { "epoch": 28.460508701472556, "grad_norm": 0.42567387223243713, "learning_rate": 9.294507833791441e-05, "loss": 0.0129, "step": 21260 }, { "epoch": 28.473895582329316, "grad_norm": 0.1681240350008011, "learning_rate": 9.293660789793295e-05, "loss": 0.011, "step": 21270 }, { "epoch": 28.487282463186077, "grad_norm": 0.20920240879058838, "learning_rate": 9.292813276246858e-05, "loss": 0.0096, "step": 21280 }, { "epoch": 28.500669344042837, "grad_norm": 0.2605859935283661, "learning_rate": 9.291965293244816e-05, "loss": 0.0109, "step": 21290 }, { "epoch": 28.514056224899598, "grad_norm": 0.2598365247249603, "learning_rate": 9.291116840879904e-05, "loss": 0.0102, "step": 21300 }, { "epoch": 28.527443105756358, "grad_norm": 0.161123588681221, "learning_rate": 9.290267919244904e-05, "loss": 0.0126, "step": 21310 }, { "epoch": 28.54082998661312, "grad_norm": 0.1918502002954483, "learning_rate": 9.289418528432655e-05, "loss": 0.012, "step": 21320 }, { "epoch": 28.55421686746988, "grad_norm": 0.21173939108848572, "learning_rate": 9.288568668536045e-05, "loss": 0.0146, "step": 21330 }, { "epoch": 28.56760374832664, "grad_norm": 0.17572355270385742, "learning_rate": 9.287718339648013e-05, "loss": 0.0119, "step": 21340 }, { "epoch": 28.5809906291834, "grad_norm": 0.16209839284420013, "learning_rate": 9.28686754186155e-05, "loss": 0.0147, "step": 21350 }, { "epoch": 28.59437751004016, "grad_norm": 0.19260604679584503, "learning_rate": 9.286016275269698e-05, "loss": 0.014, "step": 21360 }, { "epoch": 28.60776439089692, "grad_norm": 1.0364004373550415, "learning_rate": 9.285164539965551e-05, "loss": 0.0135, "step": 21370 }, { "epoch": 28.621151271753682, "grad_norm": 0.26272377371788025, "learning_rate": 9.284312336042251e-05, "loss": 0.0153, "step": 21380 }, { "epoch": 28.634538152610443, "grad_norm": 0.14384295046329498, "learning_rate": 9.283459663592996e-05, "loss": 0.012, "step": 21390 }, { "epoch": 28.647925033467203, "grad_norm": 0.201413094997406, "learning_rate": 9.282606522711033e-05, "loss": 0.0115, "step": 21400 }, { "epoch": 28.661311914323964, "grad_norm": 0.16899079084396362, "learning_rate": 9.281752913489657e-05, "loss": 0.0116, "step": 21410 }, { "epoch": 28.674698795180724, "grad_norm": 0.15751568973064423, "learning_rate": 9.280898836022222e-05, "loss": 0.0122, "step": 21420 }, { "epoch": 28.688085676037485, "grad_norm": 0.16425518691539764, "learning_rate": 9.280044290402126e-05, "loss": 0.0139, "step": 21430 }, { "epoch": 28.701472556894245, "grad_norm": 0.19023317098617554, "learning_rate": 9.279189276722821e-05, "loss": 0.0123, "step": 21440 }, { "epoch": 28.714859437751002, "grad_norm": 0.3347594439983368, "learning_rate": 9.278333795077812e-05, "loss": 0.0123, "step": 21450 }, { "epoch": 28.728246318607763, "grad_norm": 0.1461198478937149, "learning_rate": 9.27747784556065e-05, "loss": 0.013, "step": 21460 }, { "epoch": 28.741633199464523, "grad_norm": 0.42167606949806213, "learning_rate": 9.276621428264942e-05, "loss": 0.0115, "step": 21470 }, { "epoch": 28.755020080321284, "grad_norm": 0.37610265612602234, "learning_rate": 9.275764543284345e-05, "loss": 0.0131, "step": 21480 }, { "epoch": 28.768406961178044, "grad_norm": 0.6995325088500977, "learning_rate": 9.274907190712566e-05, "loss": 0.0126, "step": 21490 }, { "epoch": 28.781793842034805, "grad_norm": 0.21641376614570618, "learning_rate": 9.274049370643363e-05, "loss": 0.011, "step": 21500 }, { "epoch": 28.795180722891565, "grad_norm": 0.3136879801750183, "learning_rate": 9.273191083170547e-05, "loss": 0.0133, "step": 21510 }, { "epoch": 28.808567603748326, "grad_norm": 0.3305152654647827, "learning_rate": 9.27233232838798e-05, "loss": 0.011, "step": 21520 }, { "epoch": 28.821954484605087, "grad_norm": 0.29537853598594666, "learning_rate": 9.27147310638957e-05, "loss": 0.0133, "step": 21530 }, { "epoch": 28.835341365461847, "grad_norm": 0.32328280806541443, "learning_rate": 9.270613417269286e-05, "loss": 0.0123, "step": 21540 }, { "epoch": 28.848728246318608, "grad_norm": 0.26585811376571655, "learning_rate": 9.269753261121138e-05, "loss": 0.0134, "step": 21550 }, { "epoch": 28.862115127175368, "grad_norm": 0.24236133694648743, "learning_rate": 9.268892638039194e-05, "loss": 0.0107, "step": 21560 }, { "epoch": 28.87550200803213, "grad_norm": 0.3880181610584259, "learning_rate": 9.268031548117569e-05, "loss": 0.0132, "step": 21570 }, { "epoch": 28.88888888888889, "grad_norm": 0.2920311987400055, "learning_rate": 9.26716999145043e-05, "loss": 0.0115, "step": 21580 }, { "epoch": 28.90227576974565, "grad_norm": 0.1481393426656723, "learning_rate": 9.266307968131998e-05, "loss": 0.0122, "step": 21590 }, { "epoch": 28.91566265060241, "grad_norm": 0.35966774821281433, "learning_rate": 9.26544547825654e-05, "loss": 0.0123, "step": 21600 }, { "epoch": 28.92904953145917, "grad_norm": 0.1978951394557953, "learning_rate": 9.264582521918376e-05, "loss": 0.0125, "step": 21610 }, { "epoch": 28.94243641231593, "grad_norm": 0.10676086694002151, "learning_rate": 9.263719099211881e-05, "loss": 0.0112, "step": 21620 }, { "epoch": 28.955823293172692, "grad_norm": 0.2377779632806778, "learning_rate": 9.262855210231476e-05, "loss": 0.0121, "step": 21630 }, { "epoch": 28.969210174029453, "grad_norm": 0.23108632862567902, "learning_rate": 9.261990855071633e-05, "loss": 0.0127, "step": 21640 }, { "epoch": 28.982597054886213, "grad_norm": 0.7274494171142578, "learning_rate": 9.261126033826878e-05, "loss": 0.0127, "step": 21650 }, { "epoch": 28.99598393574297, "grad_norm": 0.240697979927063, "learning_rate": 9.260260746591786e-05, "loss": 0.0107, "step": 21660 }, { "epoch": 29.00937081659973, "grad_norm": 0.38497745990753174, "learning_rate": 9.259394993460985e-05, "loss": 0.0112, "step": 21670 }, { "epoch": 29.02275769745649, "grad_norm": 0.12018400430679321, "learning_rate": 9.258528774529151e-05, "loss": 0.0119, "step": 21680 }, { "epoch": 29.03614457831325, "grad_norm": 0.3014098107814789, "learning_rate": 9.257662089891013e-05, "loss": 0.0129, "step": 21690 }, { "epoch": 29.049531459170012, "grad_norm": 0.175050288438797, "learning_rate": 9.25679493964135e-05, "loss": 0.0126, "step": 21700 }, { "epoch": 29.062918340026773, "grad_norm": 0.2143370509147644, "learning_rate": 9.255927323874994e-05, "loss": 0.0122, "step": 21710 }, { "epoch": 29.076305220883533, "grad_norm": 0.13151027262210846, "learning_rate": 9.255059242686822e-05, "loss": 0.0143, "step": 21720 }, { "epoch": 29.089692101740294, "grad_norm": 0.18942338228225708, "learning_rate": 9.254190696171769e-05, "loss": 0.0118, "step": 21730 }, { "epoch": 29.103078982597054, "grad_norm": 0.2644917368888855, "learning_rate": 9.25332168442482e-05, "loss": 0.015, "step": 21740 }, { "epoch": 29.116465863453815, "grad_norm": 0.25980013608932495, "learning_rate": 9.252452207541004e-05, "loss": 0.0144, "step": 21750 }, { "epoch": 29.129852744310575, "grad_norm": 0.22295747697353363, "learning_rate": 9.251582265615409e-05, "loss": 0.0138, "step": 21760 }, { "epoch": 29.143239625167336, "grad_norm": 0.26612037420272827, "learning_rate": 9.250711858743169e-05, "loss": 0.0143, "step": 21770 }, { "epoch": 29.156626506024097, "grad_norm": 0.690872311592102, "learning_rate": 9.24984098701947e-05, "loss": 0.0135, "step": 21780 }, { "epoch": 29.170013386880857, "grad_norm": 0.22901147603988647, "learning_rate": 9.248969650539552e-05, "loss": 0.0115, "step": 21790 }, { "epoch": 29.183400267737618, "grad_norm": 0.15433523058891296, "learning_rate": 9.2480978493987e-05, "loss": 0.0127, "step": 21800 }, { "epoch": 29.196787148594378, "grad_norm": 0.32197514176368713, "learning_rate": 9.247225583692256e-05, "loss": 0.011, "step": 21810 }, { "epoch": 29.21017402945114, "grad_norm": 0.31760039925575256, "learning_rate": 9.246352853515607e-05, "loss": 0.0161, "step": 21820 }, { "epoch": 29.2235609103079, "grad_norm": 0.27762848138809204, "learning_rate": 9.245479658964194e-05, "loss": 0.0145, "step": 21830 }, { "epoch": 29.23694779116466, "grad_norm": 0.37053030729293823, "learning_rate": 9.244606000133507e-05, "loss": 0.0153, "step": 21840 }, { "epoch": 29.25033467202142, "grad_norm": 0.34943726658821106, "learning_rate": 9.24373187711909e-05, "loss": 0.0136, "step": 21850 }, { "epoch": 29.26372155287818, "grad_norm": 0.16717539727687836, "learning_rate": 9.242857290016537e-05, "loss": 0.0107, "step": 21860 }, { "epoch": 29.27710843373494, "grad_norm": 0.16529420018196106, "learning_rate": 9.241982238921488e-05, "loss": 0.0155, "step": 21870 }, { "epoch": 29.2904953145917, "grad_norm": 0.24007880687713623, "learning_rate": 9.24110672392964e-05, "loss": 0.014, "step": 21880 }, { "epoch": 29.30388219544846, "grad_norm": 0.23965118825435638, "learning_rate": 9.240230745136737e-05, "loss": 0.0134, "step": 21890 }, { "epoch": 29.31726907630522, "grad_norm": 0.2060554176568985, "learning_rate": 9.239354302638575e-05, "loss": 0.0128, "step": 21900 }, { "epoch": 29.33065595716198, "grad_norm": 0.17250199615955353, "learning_rate": 9.238477396531e-05, "loss": 0.0113, "step": 21910 }, { "epoch": 29.34404283801874, "grad_norm": 0.18925276398658752, "learning_rate": 9.23760002690991e-05, "loss": 0.014, "step": 21920 }, { "epoch": 29.3574297188755, "grad_norm": 0.2916983664035797, "learning_rate": 9.236722193871252e-05, "loss": 0.012, "step": 21930 }, { "epoch": 29.37081659973226, "grad_norm": 0.17697735130786896, "learning_rate": 9.235843897511023e-05, "loss": 0.0126, "step": 21940 }, { "epoch": 29.384203480589022, "grad_norm": 0.19937114417552948, "learning_rate": 9.234965137925276e-05, "loss": 0.0132, "step": 21950 }, { "epoch": 29.397590361445783, "grad_norm": 0.18338842689990997, "learning_rate": 9.234085915210108e-05, "loss": 0.0119, "step": 21960 }, { "epoch": 29.410977242302543, "grad_norm": 0.2236151099205017, "learning_rate": 9.23320622946167e-05, "loss": 0.0132, "step": 21970 }, { "epoch": 29.424364123159304, "grad_norm": 0.2794235646724701, "learning_rate": 9.232326080776163e-05, "loss": 0.0094, "step": 21980 }, { "epoch": 29.437751004016064, "grad_norm": 0.4239465296268463, "learning_rate": 9.23144546924984e-05, "loss": 0.0136, "step": 21990 }, { "epoch": 29.451137884872825, "grad_norm": 0.20273981988430023, "learning_rate": 9.230564394979e-05, "loss": 0.0131, "step": 22000 }, { "epoch": 29.464524765729585, "grad_norm": 0.27187711000442505, "learning_rate": 9.22968285806e-05, "loss": 0.0125, "step": 22010 }, { "epoch": 29.477911646586346, "grad_norm": 0.39072659611701965, "learning_rate": 9.228800858589242e-05, "loss": 0.0133, "step": 22020 }, { "epoch": 29.491298527443107, "grad_norm": 0.15686485171318054, "learning_rate": 9.227918396663179e-05, "loss": 0.0125, "step": 22030 }, { "epoch": 29.504685408299867, "grad_norm": 0.6896948218345642, "learning_rate": 9.227035472378319e-05, "loss": 0.0126, "step": 22040 }, { "epoch": 29.518072289156628, "grad_norm": 0.26803165674209595, "learning_rate": 9.226152085831213e-05, "loss": 0.0164, "step": 22050 }, { "epoch": 29.531459170013388, "grad_norm": 0.4878256022930145, "learning_rate": 9.22526823711847e-05, "loss": 0.0145, "step": 22060 }, { "epoch": 29.54484605087015, "grad_norm": 0.3553222417831421, "learning_rate": 9.224383926336745e-05, "loss": 0.015, "step": 22070 }, { "epoch": 29.55823293172691, "grad_norm": 0.2475862056016922, "learning_rate": 9.223499153582744e-05, "loss": 0.0124, "step": 22080 }, { "epoch": 29.57161981258367, "grad_norm": 0.20454807579517365, "learning_rate": 9.222613918953226e-05, "loss": 0.0118, "step": 22090 }, { "epoch": 29.585006693440427, "grad_norm": 0.19153545796871185, "learning_rate": 9.221728222544999e-05, "loss": 0.0132, "step": 22100 }, { "epoch": 29.598393574297187, "grad_norm": 0.1646256148815155, "learning_rate": 9.22084206445492e-05, "loss": 0.0106, "step": 22110 }, { "epoch": 29.611780455153948, "grad_norm": 0.5240477323532104, "learning_rate": 9.2199554447799e-05, "loss": 0.0131, "step": 22120 }, { "epoch": 29.62516733601071, "grad_norm": 0.23524633049964905, "learning_rate": 9.219068363616897e-05, "loss": 0.0125, "step": 22130 }, { "epoch": 29.63855421686747, "grad_norm": 0.12487881630659103, "learning_rate": 9.218180821062919e-05, "loss": 0.012, "step": 22140 }, { "epoch": 29.65194109772423, "grad_norm": 0.4032960832118988, "learning_rate": 9.21729281721503e-05, "loss": 0.0165, "step": 22150 }, { "epoch": 29.66532797858099, "grad_norm": 0.1710500866174698, "learning_rate": 9.216404352170339e-05, "loss": 0.0122, "step": 22160 }, { "epoch": 29.67871485943775, "grad_norm": 0.2562510669231415, "learning_rate": 9.215515426026007e-05, "loss": 0.0145, "step": 22170 }, { "epoch": 29.69210174029451, "grad_norm": 0.19803999364376068, "learning_rate": 9.214626038879246e-05, "loss": 0.0145, "step": 22180 }, { "epoch": 29.70548862115127, "grad_norm": 0.26342469453811646, "learning_rate": 9.21373619082732e-05, "loss": 0.0116, "step": 22190 }, { "epoch": 29.718875502008032, "grad_norm": 0.4418940842151642, "learning_rate": 9.212845881967535e-05, "loss": 0.0129, "step": 22200 }, { "epoch": 29.732262382864793, "grad_norm": 0.4962005019187927, "learning_rate": 9.211955112397262e-05, "loss": 0.0129, "step": 22210 }, { "epoch": 29.745649263721553, "grad_norm": 0.2150939702987671, "learning_rate": 9.211063882213909e-05, "loss": 0.0109, "step": 22220 }, { "epoch": 29.759036144578314, "grad_norm": 0.16666434705257416, "learning_rate": 9.210172191514942e-05, "loss": 0.0137, "step": 22230 }, { "epoch": 29.772423025435074, "grad_norm": 0.2547589838504791, "learning_rate": 9.209280040397874e-05, "loss": 0.0118, "step": 22240 }, { "epoch": 29.785809906291835, "grad_norm": 1.069633960723877, "learning_rate": 9.208387428960268e-05, "loss": 0.0135, "step": 22250 }, { "epoch": 29.799196787148595, "grad_norm": 0.2396315485239029, "learning_rate": 9.20749435729974e-05, "loss": 0.0115, "step": 22260 }, { "epoch": 29.812583668005356, "grad_norm": 0.1554299294948578, "learning_rate": 9.206600825513957e-05, "loss": 0.0126, "step": 22270 }, { "epoch": 29.825970548862117, "grad_norm": 0.22870582342147827, "learning_rate": 9.20570683370063e-05, "loss": 0.0125, "step": 22280 }, { "epoch": 29.839357429718877, "grad_norm": 0.14337776601314545, "learning_rate": 9.204812381957528e-05, "loss": 0.0124, "step": 22290 }, { "epoch": 29.852744310575638, "grad_norm": 0.27327585220336914, "learning_rate": 9.203917470382465e-05, "loss": 0.0147, "step": 22300 }, { "epoch": 29.866131191432395, "grad_norm": 0.2793022096157074, "learning_rate": 9.203022099073309e-05, "loss": 0.0122, "step": 22310 }, { "epoch": 29.879518072289155, "grad_norm": 0.3968491554260254, "learning_rate": 9.202126268127976e-05, "loss": 0.0172, "step": 22320 }, { "epoch": 29.892904953145916, "grad_norm": 0.5382661819458008, "learning_rate": 9.20122997764443e-05, "loss": 0.0184, "step": 22330 }, { "epoch": 29.906291834002676, "grad_norm": 0.1990213841199875, "learning_rate": 9.200333227720692e-05, "loss": 0.0137, "step": 22340 }, { "epoch": 29.919678714859437, "grad_norm": 0.34795328974723816, "learning_rate": 9.199436018454826e-05, "loss": 0.0179, "step": 22350 }, { "epoch": 29.933065595716197, "grad_norm": 0.4847314655780792, "learning_rate": 9.198538349944952e-05, "loss": 0.0153, "step": 22360 }, { "epoch": 29.946452476572958, "grad_norm": 0.23421669006347656, "learning_rate": 9.197640222289234e-05, "loss": 0.0162, "step": 22370 }, { "epoch": 29.95983935742972, "grad_norm": 0.3498959541320801, "learning_rate": 9.196741635585895e-05, "loss": 0.0153, "step": 22380 }, { "epoch": 29.97322623828648, "grad_norm": 0.2574198544025421, "learning_rate": 9.195842589933199e-05, "loss": 0.0146, "step": 22390 }, { "epoch": 29.98661311914324, "grad_norm": 0.18281257152557373, "learning_rate": 9.194943085429466e-05, "loss": 0.0131, "step": 22400 }, { "epoch": 30.0, "grad_norm": 0.31799957156181335, "learning_rate": 9.194043122173065e-05, "loss": 0.0142, "step": 22410 }, { "epoch": 30.01338688085676, "grad_norm": 0.24007520079612732, "learning_rate": 9.193142700262413e-05, "loss": 0.0141, "step": 22420 }, { "epoch": 30.02677376171352, "grad_norm": 0.266321063041687, "learning_rate": 9.192241819795979e-05, "loss": 0.0162, "step": 22430 }, { "epoch": 30.04016064257028, "grad_norm": 0.17171025276184082, "learning_rate": 9.191340480872284e-05, "loss": 0.0148, "step": 22440 }, { "epoch": 30.053547523427042, "grad_norm": 0.21963053941726685, "learning_rate": 9.190438683589895e-05, "loss": 0.0137, "step": 22450 }, { "epoch": 30.066934404283803, "grad_norm": 0.4289073348045349, "learning_rate": 9.189536428047432e-05, "loss": 0.013, "step": 22460 }, { "epoch": 30.080321285140563, "grad_norm": 0.13240565359592438, "learning_rate": 9.188633714343564e-05, "loss": 0.0139, "step": 22470 }, { "epoch": 30.093708165997324, "grad_norm": 0.23287439346313477, "learning_rate": 9.18773054257701e-05, "loss": 0.0138, "step": 22480 }, { "epoch": 30.107095046854084, "grad_norm": 0.18640385568141937, "learning_rate": 9.18682691284654e-05, "loss": 0.0124, "step": 22490 }, { "epoch": 30.120481927710845, "grad_norm": 0.15596362948417664, "learning_rate": 9.185922825250974e-05, "loss": 0.0097, "step": 22500 }, { "epoch": 30.133868808567605, "grad_norm": 0.12925942242145538, "learning_rate": 9.185018279889181e-05, "loss": 0.0103, "step": 22510 }, { "epoch": 30.147255689424362, "grad_norm": 0.43735477328300476, "learning_rate": 9.184113276860082e-05, "loss": 0.012, "step": 22520 }, { "epoch": 30.160642570281123, "grad_norm": 0.4354429841041565, "learning_rate": 9.183207816262645e-05, "loss": 0.0109, "step": 22530 }, { "epoch": 30.174029451137883, "grad_norm": 0.3208870589733124, "learning_rate": 9.182301898195891e-05, "loss": 0.0163, "step": 22540 }, { "epoch": 30.187416331994644, "grad_norm": 0.20310039818286896, "learning_rate": 9.181395522758889e-05, "loss": 0.0113, "step": 22550 }, { "epoch": 30.200803212851405, "grad_norm": 0.26982614398002625, "learning_rate": 9.180488690050759e-05, "loss": 0.0123, "step": 22560 }, { "epoch": 30.214190093708165, "grad_norm": 0.21957263350486755, "learning_rate": 9.179581400170671e-05, "loss": 0.014, "step": 22570 }, { "epoch": 30.227576974564926, "grad_norm": 0.17164553701877594, "learning_rate": 9.178673653217845e-05, "loss": 0.0108, "step": 22580 }, { "epoch": 30.240963855421686, "grad_norm": 0.26731714606285095, "learning_rate": 9.177765449291551e-05, "loss": 0.0125, "step": 22590 }, { "epoch": 30.254350736278447, "grad_norm": 0.2126782238483429, "learning_rate": 9.176856788491109e-05, "loss": 0.0121, "step": 22600 }, { "epoch": 30.267737617135207, "grad_norm": 0.14980091154575348, "learning_rate": 9.175947670915887e-05, "loss": 0.0139, "step": 22610 }, { "epoch": 30.281124497991968, "grad_norm": 0.2562045454978943, "learning_rate": 9.175038096665309e-05, "loss": 0.0119, "step": 22620 }, { "epoch": 30.29451137884873, "grad_norm": 0.21015331149101257, "learning_rate": 9.17412806583884e-05, "loss": 0.0156, "step": 22630 }, { "epoch": 30.30789825970549, "grad_norm": 1.1795164346694946, "learning_rate": 9.173217578536002e-05, "loss": 0.0154, "step": 22640 }, { "epoch": 30.32128514056225, "grad_norm": 0.29279619455337524, "learning_rate": 9.172306634856362e-05, "loss": 0.0149, "step": 22650 }, { "epoch": 30.33467202141901, "grad_norm": 0.2028917819261551, "learning_rate": 9.171395234899545e-05, "loss": 0.0113, "step": 22660 }, { "epoch": 30.34805890227577, "grad_norm": 0.38527345657348633, "learning_rate": 9.170483378765214e-05, "loss": 0.0159, "step": 22670 }, { "epoch": 30.36144578313253, "grad_norm": 0.2352435141801834, "learning_rate": 9.169571066553091e-05, "loss": 0.0128, "step": 22680 }, { "epoch": 30.37483266398929, "grad_norm": 0.20547598600387573, "learning_rate": 9.168658298362946e-05, "loss": 0.016, "step": 22690 }, { "epoch": 30.388219544846052, "grad_norm": 0.2711181342601776, "learning_rate": 9.167745074294598e-05, "loss": 0.0133, "step": 22700 }, { "epoch": 30.401606425702813, "grad_norm": 0.24533072113990784, "learning_rate": 9.166831394447913e-05, "loss": 0.0148, "step": 22710 }, { "epoch": 30.414993306559573, "grad_norm": 0.3732714056968689, "learning_rate": 9.165917258922812e-05, "loss": 0.0145, "step": 22720 }, { "epoch": 30.42838018741633, "grad_norm": 0.24769359827041626, "learning_rate": 9.165002667819262e-05, "loss": 0.0142, "step": 22730 }, { "epoch": 30.44176706827309, "grad_norm": 0.19436362385749817, "learning_rate": 9.164087621237282e-05, "loss": 0.0127, "step": 22740 }, { "epoch": 30.45515394912985, "grad_norm": 0.3694249093532562, "learning_rate": 9.163172119276942e-05, "loss": 0.0119, "step": 22750 }, { "epoch": 30.468540829986612, "grad_norm": 0.37499016523361206, "learning_rate": 9.162256162038358e-05, "loss": 0.0137, "step": 22760 }, { "epoch": 30.481927710843372, "grad_norm": 0.16872191429138184, "learning_rate": 9.161339749621698e-05, "loss": 0.0126, "step": 22770 }, { "epoch": 30.495314591700133, "grad_norm": 0.1506834328174591, "learning_rate": 9.160422882127177e-05, "loss": 0.0131, "step": 22780 }, { "epoch": 30.508701472556893, "grad_norm": 0.27465325593948364, "learning_rate": 9.159505559655069e-05, "loss": 0.0128, "step": 22790 }, { "epoch": 30.522088353413654, "grad_norm": 0.15983882546424866, "learning_rate": 9.158587782305684e-05, "loss": 0.0131, "step": 22800 }, { "epoch": 30.535475234270415, "grad_norm": 0.13208431005477905, "learning_rate": 9.157669550179391e-05, "loss": 0.0114, "step": 22810 }, { "epoch": 30.548862115127175, "grad_norm": 0.17907267808914185, "learning_rate": 9.156750863376609e-05, "loss": 0.0095, "step": 22820 }, { "epoch": 30.562248995983936, "grad_norm": 0.4148152470588684, "learning_rate": 9.155831721997801e-05, "loss": 0.0112, "step": 22830 }, { "epoch": 30.575635876840696, "grad_norm": 0.3587210476398468, "learning_rate": 9.154912126143484e-05, "loss": 0.0115, "step": 22840 }, { "epoch": 30.589022757697457, "grad_norm": 0.1352262645959854, "learning_rate": 9.153992075914224e-05, "loss": 0.0098, "step": 22850 }, { "epoch": 30.602409638554217, "grad_norm": 0.29143255949020386, "learning_rate": 9.153071571410635e-05, "loss": 0.0112, "step": 22860 }, { "epoch": 30.615796519410978, "grad_norm": 0.21093040704727173, "learning_rate": 9.152150612733384e-05, "loss": 0.0117, "step": 22870 }, { "epoch": 30.62918340026774, "grad_norm": 0.2187035232782364, "learning_rate": 9.151229199983184e-05, "loss": 0.0138, "step": 22880 }, { "epoch": 30.6425702811245, "grad_norm": 0.1888575553894043, "learning_rate": 9.150307333260802e-05, "loss": 0.0137, "step": 22890 }, { "epoch": 30.65595716198126, "grad_norm": 0.19159725308418274, "learning_rate": 9.149385012667048e-05, "loss": 0.0132, "step": 22900 }, { "epoch": 30.66934404283802, "grad_norm": 0.17314355075359344, "learning_rate": 9.148462238302788e-05, "loss": 0.0109, "step": 22910 }, { "epoch": 30.68273092369478, "grad_norm": 0.26336178183555603, "learning_rate": 9.147539010268936e-05, "loss": 0.0135, "step": 22920 }, { "epoch": 30.69611780455154, "grad_norm": 0.1942424327135086, "learning_rate": 9.14661532866645e-05, "loss": 0.0124, "step": 22930 }, { "epoch": 30.7095046854083, "grad_norm": 0.2236313670873642, "learning_rate": 9.145691193596348e-05, "loss": 0.0131, "step": 22940 }, { "epoch": 30.72289156626506, "grad_norm": 0.3134929835796356, "learning_rate": 9.144766605159691e-05, "loss": 0.0144, "step": 22950 }, { "epoch": 30.73627844712182, "grad_norm": 0.17652925848960876, "learning_rate": 9.14384156345759e-05, "loss": 0.0125, "step": 22960 }, { "epoch": 30.74966532797858, "grad_norm": 0.18811914324760437, "learning_rate": 9.142916068591204e-05, "loss": 0.0093, "step": 22970 }, { "epoch": 30.76305220883534, "grad_norm": 0.42763084173202515, "learning_rate": 9.141990120661746e-05, "loss": 0.0139, "step": 22980 }, { "epoch": 30.7764390896921, "grad_norm": 0.12529496848583221, "learning_rate": 9.141063719770475e-05, "loss": 0.0101, "step": 22990 }, { "epoch": 30.78982597054886, "grad_norm": 0.12899816036224365, "learning_rate": 9.140136866018704e-05, "loss": 0.0134, "step": 23000 }, { "epoch": 30.803212851405622, "grad_norm": 0.18077035248279572, "learning_rate": 9.139209559507788e-05, "loss": 0.0129, "step": 23010 }, { "epoch": 30.816599732262382, "grad_norm": 0.7673326730728149, "learning_rate": 9.13828180033914e-05, "loss": 0.0108, "step": 23020 }, { "epoch": 30.829986613119143, "grad_norm": 0.2501109838485718, "learning_rate": 9.137353588614212e-05, "loss": 0.0127, "step": 23030 }, { "epoch": 30.843373493975903, "grad_norm": 0.17980565130710602, "learning_rate": 9.136424924434519e-05, "loss": 0.0123, "step": 23040 }, { "epoch": 30.856760374832664, "grad_norm": 0.19513365626335144, "learning_rate": 9.135495807901615e-05, "loss": 0.0119, "step": 23050 }, { "epoch": 30.870147255689425, "grad_norm": 0.2010471671819687, "learning_rate": 9.134566239117108e-05, "loss": 0.0132, "step": 23060 }, { "epoch": 30.883534136546185, "grad_norm": 0.1881069839000702, "learning_rate": 9.13363621818265e-05, "loss": 0.0135, "step": 23070 }, { "epoch": 30.896921017402946, "grad_norm": 0.6633850336074829, "learning_rate": 9.132705745199953e-05, "loss": 0.0143, "step": 23080 }, { "epoch": 30.910307898259706, "grad_norm": 0.26158276200294495, "learning_rate": 9.131774820270768e-05, "loss": 0.0107, "step": 23090 }, { "epoch": 30.923694779116467, "grad_norm": 0.19016818702220917, "learning_rate": 9.130843443496901e-05, "loss": 0.0119, "step": 23100 }, { "epoch": 30.937081659973227, "grad_norm": 0.20617720484733582, "learning_rate": 9.129911614980206e-05, "loss": 0.0129, "step": 23110 }, { "epoch": 30.950468540829988, "grad_norm": 0.1866970807313919, "learning_rate": 9.128979334822584e-05, "loss": 0.0119, "step": 23120 }, { "epoch": 30.96385542168675, "grad_norm": 0.22950142621994019, "learning_rate": 9.128046603125992e-05, "loss": 0.0142, "step": 23130 }, { "epoch": 30.97724230254351, "grad_norm": 0.1447577178478241, "learning_rate": 9.12711341999243e-05, "loss": 0.0123, "step": 23140 }, { "epoch": 30.99062918340027, "grad_norm": 0.1814243346452713, "learning_rate": 9.12617978552395e-05, "loss": 0.0123, "step": 23150 }, { "epoch": 31.004016064257026, "grad_norm": 1.0605344772338867, "learning_rate": 9.12524569982265e-05, "loss": 0.0126, "step": 23160 }, { "epoch": 31.017402945113787, "grad_norm": 0.18132682144641876, "learning_rate": 9.124311162990684e-05, "loss": 0.0126, "step": 23170 }, { "epoch": 31.030789825970547, "grad_norm": 0.3270533084869385, "learning_rate": 9.12337617513025e-05, "loss": 0.0152, "step": 23180 }, { "epoch": 31.044176706827308, "grad_norm": 0.16277778148651123, "learning_rate": 9.122440736343596e-05, "loss": 0.0132, "step": 23190 }, { "epoch": 31.05756358768407, "grad_norm": 0.2529515326023102, "learning_rate": 9.12150484673302e-05, "loss": 0.0125, "step": 23200 }, { "epoch": 31.07095046854083, "grad_norm": 0.2744923233985901, "learning_rate": 9.120568506400873e-05, "loss": 0.0127, "step": 23210 }, { "epoch": 31.08433734939759, "grad_norm": 1.311956524848938, "learning_rate": 9.119631715449548e-05, "loss": 0.0121, "step": 23220 }, { "epoch": 31.09772423025435, "grad_norm": 0.1455685943365097, "learning_rate": 9.118694473981493e-05, "loss": 0.0121, "step": 23230 }, { "epoch": 31.11111111111111, "grad_norm": 0.19599926471710205, "learning_rate": 9.117756782099203e-05, "loss": 0.0111, "step": 23240 }, { "epoch": 31.12449799196787, "grad_norm": 0.1880214959383011, "learning_rate": 9.11681863990522e-05, "loss": 0.0123, "step": 23250 }, { "epoch": 31.137884872824632, "grad_norm": 0.205248162150383, "learning_rate": 9.115880047502142e-05, "loss": 0.0107, "step": 23260 }, { "epoch": 31.151271753681392, "grad_norm": 0.22831980884075165, "learning_rate": 9.114941004992609e-05, "loss": 0.0132, "step": 23270 }, { "epoch": 31.164658634538153, "grad_norm": 0.12402219325304031, "learning_rate": 9.114001512479317e-05, "loss": 0.0108, "step": 23280 }, { "epoch": 31.178045515394913, "grad_norm": 0.4879927635192871, "learning_rate": 9.113061570065003e-05, "loss": 0.0127, "step": 23290 }, { "epoch": 31.191432396251674, "grad_norm": 0.5215287804603577, "learning_rate": 9.112121177852459e-05, "loss": 0.013, "step": 23300 }, { "epoch": 31.204819277108435, "grad_norm": 0.21079260110855103, "learning_rate": 9.111180335944527e-05, "loss": 0.0125, "step": 23310 }, { "epoch": 31.218206157965195, "grad_norm": 0.15001477301120758, "learning_rate": 9.110239044444093e-05, "loss": 0.01, "step": 23320 }, { "epoch": 31.231593038821956, "grad_norm": 0.2726813852787018, "learning_rate": 9.109297303454099e-05, "loss": 0.0123, "step": 23330 }, { "epoch": 31.244979919678716, "grad_norm": 0.15857535600662231, "learning_rate": 9.108355113077526e-05, "loss": 0.0101, "step": 23340 }, { "epoch": 31.258366800535477, "grad_norm": 0.2974711060523987, "learning_rate": 9.107412473417419e-05, "loss": 0.013, "step": 23350 }, { "epoch": 31.271753681392237, "grad_norm": 0.15317264199256897, "learning_rate": 9.106469384576858e-05, "loss": 0.0143, "step": 23360 }, { "epoch": 31.285140562248998, "grad_norm": 0.2184690535068512, "learning_rate": 9.105525846658978e-05, "loss": 0.0113, "step": 23370 }, { "epoch": 31.298527443105755, "grad_norm": 0.23737148940563202, "learning_rate": 9.104581859766965e-05, "loss": 0.0126, "step": 23380 }, { "epoch": 31.311914323962515, "grad_norm": 0.1964758187532425, "learning_rate": 9.10363742400405e-05, "loss": 0.0134, "step": 23390 }, { "epoch": 31.325301204819276, "grad_norm": 0.24911922216415405, "learning_rate": 9.102692539473518e-05, "loss": 0.0125, "step": 23400 }, { "epoch": 31.338688085676036, "grad_norm": 0.1482793390750885, "learning_rate": 9.101747206278697e-05, "loss": 0.012, "step": 23410 }, { "epoch": 31.352074966532797, "grad_norm": 0.17397095263004303, "learning_rate": 9.100801424522968e-05, "loss": 0.012, "step": 23420 }, { "epoch": 31.365461847389557, "grad_norm": 0.17205265164375305, "learning_rate": 9.099855194309762e-05, "loss": 0.0118, "step": 23430 }, { "epoch": 31.378848728246318, "grad_norm": 0.26745113730430603, "learning_rate": 9.098908515742554e-05, "loss": 0.0144, "step": 23440 }, { "epoch": 31.39223560910308, "grad_norm": 0.25026971101760864, "learning_rate": 9.097961388924873e-05, "loss": 0.0128, "step": 23450 }, { "epoch": 31.40562248995984, "grad_norm": 0.20131173729896545, "learning_rate": 9.097013813960298e-05, "loss": 0.0119, "step": 23460 }, { "epoch": 31.4190093708166, "grad_norm": 0.11866898089647293, "learning_rate": 9.09606579095245e-05, "loss": 0.0123, "step": 23470 }, { "epoch": 31.43239625167336, "grad_norm": 0.2159038633108139, "learning_rate": 9.095117320005008e-05, "loss": 0.0137, "step": 23480 }, { "epoch": 31.44578313253012, "grad_norm": 0.2711471915245056, "learning_rate": 9.094168401221691e-05, "loss": 0.0153, "step": 23490 }, { "epoch": 31.45917001338688, "grad_norm": 0.19125716388225555, "learning_rate": 9.093219034706273e-05, "loss": 0.0132, "step": 23500 }, { "epoch": 31.472556894243642, "grad_norm": 0.24008774757385254, "learning_rate": 9.092269220562577e-05, "loss": 0.0105, "step": 23510 }, { "epoch": 31.485943775100402, "grad_norm": 0.25380128622055054, "learning_rate": 9.09131895889447e-05, "loss": 0.0128, "step": 23520 }, { "epoch": 31.499330655957163, "grad_norm": 0.3831942677497864, "learning_rate": 9.090368249805873e-05, "loss": 0.0131, "step": 23530 }, { "epoch": 31.512717536813923, "grad_norm": 0.24931393563747406, "learning_rate": 9.089417093400754e-05, "loss": 0.0117, "step": 23540 }, { "epoch": 31.526104417670684, "grad_norm": 0.41654035449028015, "learning_rate": 9.088465489783131e-05, "loss": 0.0134, "step": 23550 }, { "epoch": 31.539491298527444, "grad_norm": 2.0749194622039795, "learning_rate": 9.087513439057068e-05, "loss": 0.0115, "step": 23560 }, { "epoch": 31.552878179384205, "grad_norm": 0.23463307321071625, "learning_rate": 9.08656094132668e-05, "loss": 0.0155, "step": 23570 }, { "epoch": 31.566265060240966, "grad_norm": 0.2159605473279953, "learning_rate": 9.085607996696134e-05, "loss": 0.0117, "step": 23580 }, { "epoch": 31.579651941097723, "grad_norm": 0.3032042980194092, "learning_rate": 9.084654605269639e-05, "loss": 0.0129, "step": 23590 }, { "epoch": 31.593038821954483, "grad_norm": 0.2270377278327942, "learning_rate": 9.083700767151457e-05, "loss": 0.0114, "step": 23600 }, { "epoch": 31.606425702811244, "grad_norm": 0.2538822591304779, "learning_rate": 9.082746482445898e-05, "loss": 0.0124, "step": 23610 }, { "epoch": 31.619812583668004, "grad_norm": 0.24019549787044525, "learning_rate": 9.081791751257325e-05, "loss": 0.0124, "step": 23620 }, { "epoch": 31.633199464524765, "grad_norm": 0.17413872480392456, "learning_rate": 9.080836573690142e-05, "loss": 0.0118, "step": 23630 }, { "epoch": 31.646586345381525, "grad_norm": 0.1839628517627716, "learning_rate": 9.079880949848805e-05, "loss": 0.0129, "step": 23640 }, { "epoch": 31.659973226238286, "grad_norm": 0.1984264999628067, "learning_rate": 9.078924879837822e-05, "loss": 0.0122, "step": 23650 }, { "epoch": 31.673360107095046, "grad_norm": 0.6030467748641968, "learning_rate": 9.077968363761747e-05, "loss": 0.0112, "step": 23660 }, { "epoch": 31.686746987951807, "grad_norm": 0.9815787076950073, "learning_rate": 9.077011401725182e-05, "loss": 0.0108, "step": 23670 }, { "epoch": 31.700133868808567, "grad_norm": 0.2735455334186554, "learning_rate": 9.07605399383278e-05, "loss": 0.0137, "step": 23680 }, { "epoch": 31.713520749665328, "grad_norm": 0.15877984464168549, "learning_rate": 9.075096140189243e-05, "loss": 0.0122, "step": 23690 }, { "epoch": 31.72690763052209, "grad_norm": 0.27030956745147705, "learning_rate": 9.074137840899318e-05, "loss": 0.0112, "step": 23700 }, { "epoch": 31.74029451137885, "grad_norm": 0.4335390627384186, "learning_rate": 9.073179096067804e-05, "loss": 0.0121, "step": 23710 }, { "epoch": 31.75368139223561, "grad_norm": 0.34896910190582275, "learning_rate": 9.072219905799549e-05, "loss": 0.0118, "step": 23720 }, { "epoch": 31.76706827309237, "grad_norm": 0.31291988492012024, "learning_rate": 9.071260270199447e-05, "loss": 0.0128, "step": 23730 }, { "epoch": 31.78045515394913, "grad_norm": 0.3026314377784729, "learning_rate": 9.070300189372441e-05, "loss": 0.0138, "step": 23740 }, { "epoch": 31.79384203480589, "grad_norm": 0.33866560459136963, "learning_rate": 9.069339663423528e-05, "loss": 0.011, "step": 23750 }, { "epoch": 31.80722891566265, "grad_norm": 0.4334571063518524, "learning_rate": 9.068378692457747e-05, "loss": 0.0137, "step": 23760 }, { "epoch": 31.820615796519412, "grad_norm": 0.31370094418525696, "learning_rate": 9.067417276580189e-05, "loss": 0.0144, "step": 23770 }, { "epoch": 31.834002677376173, "grad_norm": 0.15256674587726593, "learning_rate": 9.066455415895993e-05, "loss": 0.0133, "step": 23780 }, { "epoch": 31.847389558232933, "grad_norm": 0.18431097269058228, "learning_rate": 9.065493110510346e-05, "loss": 0.0133, "step": 23790 }, { "epoch": 31.86077643908969, "grad_norm": 0.25223037600517273, "learning_rate": 9.064530360528484e-05, "loss": 0.0122, "step": 23800 }, { "epoch": 31.87416331994645, "grad_norm": 0.1548650711774826, "learning_rate": 9.063567166055695e-05, "loss": 0.0123, "step": 23810 }, { "epoch": 31.88755020080321, "grad_norm": 0.31195157766342163, "learning_rate": 9.062603527197308e-05, "loss": 0.0111, "step": 23820 }, { "epoch": 31.900937081659972, "grad_norm": 0.3119724690914154, "learning_rate": 9.06163944405871e-05, "loss": 0.0121, "step": 23830 }, { "epoch": 31.914323962516733, "grad_norm": 0.3135789632797241, "learning_rate": 9.060674916745327e-05, "loss": 0.0132, "step": 23840 }, { "epoch": 31.927710843373493, "grad_norm": 0.21707375347614288, "learning_rate": 9.05970994536264e-05, "loss": 0.0142, "step": 23850 }, { "epoch": 31.941097724230254, "grad_norm": 0.2622365951538086, "learning_rate": 9.05874453001618e-05, "loss": 0.0125, "step": 23860 }, { "epoch": 31.954484605087014, "grad_norm": 0.3565906882286072, "learning_rate": 9.057778670811517e-05, "loss": 0.0129, "step": 23870 }, { "epoch": 31.967871485943775, "grad_norm": 0.8743594288825989, "learning_rate": 9.056812367854281e-05, "loss": 0.0136, "step": 23880 }, { "epoch": 31.981258366800535, "grad_norm": 0.16221649944782257, "learning_rate": 9.055845621250143e-05, "loss": 0.0107, "step": 23890 }, { "epoch": 31.994645247657296, "grad_norm": 0.3427935242652893, "learning_rate": 9.054878431104825e-05, "loss": 0.0132, "step": 23900 }, { "epoch": 32.00803212851405, "grad_norm": 0.19652865827083588, "learning_rate": 9.0539107975241e-05, "loss": 0.0121, "step": 23910 }, { "epoch": 32.02141900937082, "grad_norm": 0.16208279132843018, "learning_rate": 9.052942720613784e-05, "loss": 0.0111, "step": 23920 }, { "epoch": 32.034805890227574, "grad_norm": 0.18484413623809814, "learning_rate": 9.051974200479745e-05, "loss": 0.0109, "step": 23930 }, { "epoch": 32.04819277108434, "grad_norm": 0.2691413462162018, "learning_rate": 9.051005237227901e-05, "loss": 0.01, "step": 23940 }, { "epoch": 32.061579651941095, "grad_norm": 0.18905562162399292, "learning_rate": 9.050035830964215e-05, "loss": 0.0096, "step": 23950 }, { "epoch": 32.07496653279786, "grad_norm": 0.37168097496032715, "learning_rate": 9.049065981794698e-05, "loss": 0.0114, "step": 23960 }, { "epoch": 32.088353413654616, "grad_norm": 0.16525672376155853, "learning_rate": 9.048095689825414e-05, "loss": 0.0123, "step": 23970 }, { "epoch": 32.10174029451138, "grad_norm": 0.18955634534358978, "learning_rate": 9.047124955162472e-05, "loss": 0.0126, "step": 23980 }, { "epoch": 32.11512717536814, "grad_norm": 0.1997326910495758, "learning_rate": 9.046153777912028e-05, "loss": 0.0151, "step": 23990 }, { "epoch": 32.1285140562249, "grad_norm": 0.1379009485244751, "learning_rate": 9.045182158180292e-05, "loss": 0.0122, "step": 24000 }, { "epoch": 32.14190093708166, "grad_norm": 0.19372670352458954, "learning_rate": 9.044210096073516e-05, "loss": 0.0114, "step": 24010 }, { "epoch": 32.15528781793842, "grad_norm": 0.3639601767063141, "learning_rate": 9.043237591698004e-05, "loss": 0.0114, "step": 24020 }, { "epoch": 32.16867469879518, "grad_norm": 0.18655064702033997, "learning_rate": 9.04226464516011e-05, "loss": 0.0144, "step": 24030 }, { "epoch": 32.18206157965194, "grad_norm": 0.22212064266204834, "learning_rate": 9.041291256566229e-05, "loss": 0.0124, "step": 24040 }, { "epoch": 32.1954484605087, "grad_norm": 0.6517068147659302, "learning_rate": 9.040317426022814e-05, "loss": 0.0106, "step": 24050 }, { "epoch": 32.208835341365464, "grad_norm": 0.21096652746200562, "learning_rate": 9.03934315363636e-05, "loss": 0.013, "step": 24060 }, { "epoch": 32.22222222222222, "grad_norm": 0.22008445858955383, "learning_rate": 9.038368439513409e-05, "loss": 0.0128, "step": 24070 }, { "epoch": 32.235609103078986, "grad_norm": 0.43181002140045166, "learning_rate": 9.03739328376056e-05, "loss": 0.0112, "step": 24080 }, { "epoch": 32.24899598393574, "grad_norm": 0.19463767111301422, "learning_rate": 9.036417686484451e-05, "loss": 0.0129, "step": 24090 }, { "epoch": 32.26238286479251, "grad_norm": 0.21326236426830292, "learning_rate": 9.035441647791773e-05, "loss": 0.0112, "step": 24100 }, { "epoch": 32.275769745649264, "grad_norm": 0.2168308049440384, "learning_rate": 9.034465167789263e-05, "loss": 0.0116, "step": 24110 }, { "epoch": 32.28915662650602, "grad_norm": 0.3577476739883423, "learning_rate": 9.033488246583706e-05, "loss": 0.0121, "step": 24120 }, { "epoch": 32.302543507362785, "grad_norm": 0.2053362876176834, "learning_rate": 9.032510884281941e-05, "loss": 0.012, "step": 24130 }, { "epoch": 32.31593038821954, "grad_norm": 0.2291492074728012, "learning_rate": 9.031533080990848e-05, "loss": 0.0118, "step": 24140 }, { "epoch": 32.329317269076306, "grad_norm": 0.375667005777359, "learning_rate": 9.030554836817358e-05, "loss": 0.0112, "step": 24150 }, { "epoch": 32.34270414993306, "grad_norm": 0.21773439645767212, "learning_rate": 9.029576151868451e-05, "loss": 0.0122, "step": 24160 }, { "epoch": 32.35609103078983, "grad_norm": 0.15753722190856934, "learning_rate": 9.028597026251155e-05, "loss": 0.0102, "step": 24170 }, { "epoch": 32.369477911646584, "grad_norm": 0.40317317843437195, "learning_rate": 9.027617460072547e-05, "loss": 0.0137, "step": 24180 }, { "epoch": 32.38286479250335, "grad_norm": 0.212739035487175, "learning_rate": 9.026637453439745e-05, "loss": 0.0112, "step": 24190 }, { "epoch": 32.396251673360105, "grad_norm": 0.29585903882980347, "learning_rate": 9.025657006459927e-05, "loss": 0.0135, "step": 24200 }, { "epoch": 32.40963855421687, "grad_norm": 0.19202667474746704, "learning_rate": 9.024676119240311e-05, "loss": 0.0112, "step": 24210 }, { "epoch": 32.423025435073626, "grad_norm": 0.14061114192008972, "learning_rate": 9.023694791888166e-05, "loss": 0.0148, "step": 24220 }, { "epoch": 32.43641231593039, "grad_norm": 0.44228583574295044, "learning_rate": 9.022713024510808e-05, "loss": 0.012, "step": 24230 }, { "epoch": 32.44979919678715, "grad_norm": 0.17363011837005615, "learning_rate": 9.021730817215601e-05, "loss": 0.0127, "step": 24240 }, { "epoch": 32.46318607764391, "grad_norm": 0.1613643914461136, "learning_rate": 9.02074817010996e-05, "loss": 0.014, "step": 24250 }, { "epoch": 32.47657295850067, "grad_norm": 0.18240362405776978, "learning_rate": 9.019765083301342e-05, "loss": 0.0097, "step": 24260 }, { "epoch": 32.48995983935743, "grad_norm": 0.30965885519981384, "learning_rate": 9.01878155689726e-05, "loss": 0.0132, "step": 24270 }, { "epoch": 32.50334672021419, "grad_norm": 0.22406116127967834, "learning_rate": 9.017797591005268e-05, "loss": 0.0131, "step": 24280 }, { "epoch": 32.51673360107095, "grad_norm": 0.2946297526359558, "learning_rate": 9.016813185732972e-05, "loss": 0.0111, "step": 24290 }, { "epoch": 32.53012048192771, "grad_norm": 0.21967479586601257, "learning_rate": 9.015828341188027e-05, "loss": 0.0127, "step": 24300 }, { "epoch": 32.543507362784474, "grad_norm": 0.18086126446723938, "learning_rate": 9.01484305747813e-05, "loss": 0.011, "step": 24310 }, { "epoch": 32.55689424364123, "grad_norm": 0.1479504406452179, "learning_rate": 9.013857334711033e-05, "loss": 0.0124, "step": 24320 }, { "epoch": 32.570281124497996, "grad_norm": 0.1608758270740509, "learning_rate": 9.012871172994534e-05, "loss": 0.0139, "step": 24330 }, { "epoch": 32.58366800535475, "grad_norm": 0.21221530437469482, "learning_rate": 9.011884572436476e-05, "loss": 0.0126, "step": 24340 }, { "epoch": 32.59705488621151, "grad_norm": 0.1697196662425995, "learning_rate": 9.010897533144754e-05, "loss": 0.0114, "step": 24350 }, { "epoch": 32.610441767068274, "grad_norm": 0.3655845820903778, "learning_rate": 9.009910055227306e-05, "loss": 0.0127, "step": 24360 }, { "epoch": 32.62382864792503, "grad_norm": 0.1997029334306717, "learning_rate": 9.008922138792124e-05, "loss": 0.0122, "step": 24370 }, { "epoch": 32.637215528781795, "grad_norm": 0.20723378658294678, "learning_rate": 9.007933783947244e-05, "loss": 0.0123, "step": 24380 }, { "epoch": 32.65060240963855, "grad_norm": 0.20297326147556305, "learning_rate": 9.006944990800752e-05, "loss": 0.0115, "step": 24390 }, { "epoch": 32.663989290495316, "grad_norm": 0.24137786030769348, "learning_rate": 9.005955759460779e-05, "loss": 0.0103, "step": 24400 }, { "epoch": 32.67737617135207, "grad_norm": 0.18827463686466217, "learning_rate": 9.004966090035508e-05, "loss": 0.0112, "step": 24410 }, { "epoch": 32.69076305220884, "grad_norm": 0.21545439958572388, "learning_rate": 9.003975982633166e-05, "loss": 0.0135, "step": 24420 }, { "epoch": 32.704149933065594, "grad_norm": 0.263240784406662, "learning_rate": 9.00298543736203e-05, "loss": 0.0156, "step": 24430 }, { "epoch": 32.71753681392236, "grad_norm": 0.3085300922393799, "learning_rate": 9.001994454330427e-05, "loss": 0.0135, "step": 24440 }, { "epoch": 32.730923694779115, "grad_norm": 0.26857393980026245, "learning_rate": 9.001003033646727e-05, "loss": 0.0115, "step": 24450 }, { "epoch": 32.74431057563588, "grad_norm": 0.24905277788639069, "learning_rate": 9.00001117541935e-05, "loss": 0.0173, "step": 24460 }, { "epoch": 32.757697456492636, "grad_norm": 0.16270670294761658, "learning_rate": 8.999018879756764e-05, "loss": 0.0141, "step": 24470 }, { "epoch": 32.7710843373494, "grad_norm": 0.19902227818965912, "learning_rate": 8.998026146767487e-05, "loss": 0.0116, "step": 24480 }, { "epoch": 32.78447121820616, "grad_norm": 0.1767280399799347, "learning_rate": 8.99703297656008e-05, "loss": 0.0109, "step": 24490 }, { "epoch": 32.79785809906292, "grad_norm": 0.16332431137561798, "learning_rate": 8.996039369243156e-05, "loss": 0.0129, "step": 24500 }, { "epoch": 32.81124497991968, "grad_norm": 0.23984265327453613, "learning_rate": 8.995045324925378e-05, "loss": 0.0101, "step": 24510 }, { "epoch": 32.82463186077644, "grad_norm": 0.15917232632637024, "learning_rate": 8.994050843715448e-05, "loss": 0.0135, "step": 24520 }, { "epoch": 32.8380187416332, "grad_norm": 0.8498572111129761, "learning_rate": 8.993055925722121e-05, "loss": 0.0129, "step": 24530 }, { "epoch": 32.85140562248996, "grad_norm": 0.43343719840049744, "learning_rate": 8.992060571054202e-05, "loss": 0.0128, "step": 24540 }, { "epoch": 32.86479250334672, "grad_norm": 0.36066770553588867, "learning_rate": 8.991064779820542e-05, "loss": 0.0128, "step": 24550 }, { "epoch": 32.87817938420348, "grad_norm": 1.7000783681869507, "learning_rate": 8.990068552130036e-05, "loss": 0.0117, "step": 24560 }, { "epoch": 32.89156626506024, "grad_norm": 0.5623242259025574, "learning_rate": 8.989071888091634e-05, "loss": 0.0127, "step": 24570 }, { "epoch": 32.904953145917, "grad_norm": 0.46058693528175354, "learning_rate": 8.988074787814329e-05, "loss": 0.0098, "step": 24580 }, { "epoch": 32.91834002677376, "grad_norm": 0.19413861632347107, "learning_rate": 8.987077251407158e-05, "loss": 0.0113, "step": 24590 }, { "epoch": 32.93172690763052, "grad_norm": 0.16048979759216309, "learning_rate": 8.986079278979216e-05, "loss": 0.0108, "step": 24600 }, { "epoch": 32.945113788487284, "grad_norm": 0.23521366715431213, "learning_rate": 8.985080870639635e-05, "loss": 0.0113, "step": 24610 }, { "epoch": 32.95850066934404, "grad_norm": 0.171860933303833, "learning_rate": 8.984082026497603e-05, "loss": 0.0102, "step": 24620 }, { "epoch": 32.971887550200805, "grad_norm": 0.21174781024456024, "learning_rate": 8.98308274666235e-05, "loss": 0.0109, "step": 24630 }, { "epoch": 32.98527443105756, "grad_norm": 0.18925893306732178, "learning_rate": 8.982083031243155e-05, "loss": 0.0103, "step": 24640 }, { "epoch": 32.998661311914326, "grad_norm": 0.1693045198917389, "learning_rate": 8.98108288034935e-05, "loss": 0.0114, "step": 24650 }, { "epoch": 33.01204819277108, "grad_norm": 0.22335295379161835, "learning_rate": 8.980082294090305e-05, "loss": 0.0098, "step": 24660 }, { "epoch": 33.02543507362785, "grad_norm": 0.248099222779274, "learning_rate": 8.979081272575443e-05, "loss": 0.0105, "step": 24670 }, { "epoch": 33.038821954484604, "grad_norm": 0.2709220349788666, "learning_rate": 8.978079815914236e-05, "loss": 0.0117, "step": 24680 }, { "epoch": 33.05220883534137, "grad_norm": 0.18053972721099854, "learning_rate": 8.977077924216202e-05, "loss": 0.0101, "step": 24690 }, { "epoch": 33.065595716198125, "grad_norm": 0.2877940237522125, "learning_rate": 8.976075597590905e-05, "loss": 0.0098, "step": 24700 }, { "epoch": 33.07898259705489, "grad_norm": 0.20863479375839233, "learning_rate": 8.975072836147958e-05, "loss": 0.0101, "step": 24710 }, { "epoch": 33.092369477911646, "grad_norm": 0.2514955401420593, "learning_rate": 8.974069639997025e-05, "loss": 0.0114, "step": 24720 }, { "epoch": 33.10575635876841, "grad_norm": 0.15689602494239807, "learning_rate": 8.973066009247808e-05, "loss": 0.012, "step": 24730 }, { "epoch": 33.11914323962517, "grad_norm": 0.42758816480636597, "learning_rate": 8.972061944010066e-05, "loss": 0.0116, "step": 24740 }, { "epoch": 33.13253012048193, "grad_norm": 0.35420769453048706, "learning_rate": 8.971057444393603e-05, "loss": 0.0091, "step": 24750 }, { "epoch": 33.14591700133869, "grad_norm": 1.492099642753601, "learning_rate": 8.970052510508268e-05, "loss": 0.0124, "step": 24760 }, { "epoch": 33.159303882195445, "grad_norm": 0.3114548921585083, "learning_rate": 8.969047142463959e-05, "loss": 0.0127, "step": 24770 }, { "epoch": 33.17269076305221, "grad_norm": 0.1717631071805954, "learning_rate": 8.968041340370621e-05, "loss": 0.0122, "step": 24780 }, { "epoch": 33.186077643908966, "grad_norm": 2.7734873294830322, "learning_rate": 8.96703510433825e-05, "loss": 0.0172, "step": 24790 }, { "epoch": 33.19946452476573, "grad_norm": 0.2587295174598694, "learning_rate": 8.966028434476883e-05, "loss": 0.0167, "step": 24800 }, { "epoch": 33.21285140562249, "grad_norm": 0.16290727257728577, "learning_rate": 8.96502133089661e-05, "loss": 0.0167, "step": 24810 }, { "epoch": 33.22623828647925, "grad_norm": 0.16587837040424347, "learning_rate": 8.964013793707564e-05, "loss": 0.0147, "step": 24820 }, { "epoch": 33.23962516733601, "grad_norm": 0.3280934989452362, "learning_rate": 8.963005823019932e-05, "loss": 0.0129, "step": 24830 }, { "epoch": 33.25301204819277, "grad_norm": 0.35868555307388306, "learning_rate": 8.961997418943939e-05, "loss": 0.0168, "step": 24840 }, { "epoch": 33.26639892904953, "grad_norm": 0.24449408054351807, "learning_rate": 8.960988581589865e-05, "loss": 0.0175, "step": 24850 }, { "epoch": 33.27978580990629, "grad_norm": 0.38375717401504517, "learning_rate": 8.959979311068037e-05, "loss": 0.0126, "step": 24860 }, { "epoch": 33.29317269076305, "grad_norm": 0.22046300768852234, "learning_rate": 8.958969607488823e-05, "loss": 0.0152, "step": 24870 }, { "epoch": 33.306559571619815, "grad_norm": 0.33276668190956116, "learning_rate": 8.957959470962647e-05, "loss": 0.0178, "step": 24880 }, { "epoch": 33.31994645247657, "grad_norm": 0.28196635842323303, "learning_rate": 8.956948901599971e-05, "loss": 0.0148, "step": 24890 }, { "epoch": 33.333333333333336, "grad_norm": 0.20621225237846375, "learning_rate": 8.955937899511315e-05, "loss": 0.0165, "step": 24900 }, { "epoch": 33.34672021419009, "grad_norm": 0.28565436601638794, "learning_rate": 8.954926464807238e-05, "loss": 0.0155, "step": 24910 }, { "epoch": 33.36010709504686, "grad_norm": 0.17602446675300598, "learning_rate": 8.953914597598347e-05, "loss": 0.0126, "step": 24920 }, { "epoch": 33.373493975903614, "grad_norm": 0.2126626819372177, "learning_rate": 8.952902297995303e-05, "loss": 0.014, "step": 24930 }, { "epoch": 33.38688085676038, "grad_norm": 0.1251458376646042, "learning_rate": 8.951889566108804e-05, "loss": 0.0118, "step": 24940 }, { "epoch": 33.400267737617135, "grad_norm": 0.21666420996189117, "learning_rate": 8.950876402049606e-05, "loss": 0.0132, "step": 24950 }, { "epoch": 33.4136546184739, "grad_norm": 0.7848644852638245, "learning_rate": 8.949862805928504e-05, "loss": 0.0123, "step": 24960 }, { "epoch": 33.427041499330656, "grad_norm": 0.14410808682441711, "learning_rate": 8.948848777856343e-05, "loss": 0.0135, "step": 24970 }, { "epoch": 33.44042838018741, "grad_norm": 0.22575904428958893, "learning_rate": 8.947834317944017e-05, "loss": 0.0132, "step": 24980 }, { "epoch": 33.45381526104418, "grad_norm": 0.15037032961845398, "learning_rate": 8.946819426302466e-05, "loss": 0.013, "step": 24990 }, { "epoch": 33.467202141900934, "grad_norm": 0.2652278244495392, "learning_rate": 8.945804103042676e-05, "loss": 0.0128, "step": 25000 }, { "epoch": 33.4805890227577, "grad_norm": 0.2210964560508728, "learning_rate": 8.944788348275681e-05, "loss": 0.0122, "step": 25010 }, { "epoch": 33.493975903614455, "grad_norm": 0.2144128680229187, "learning_rate": 8.943772162112565e-05, "loss": 0.0136, "step": 25020 }, { "epoch": 33.50736278447122, "grad_norm": 0.13306480646133423, "learning_rate": 8.942755544664454e-05, "loss": 0.0115, "step": 25030 }, { "epoch": 33.520749665327976, "grad_norm": 0.258303701877594, "learning_rate": 8.941738496042525e-05, "loss": 0.0108, "step": 25040 }, { "epoch": 33.53413654618474, "grad_norm": 0.24646523594856262, "learning_rate": 8.940721016357999e-05, "loss": 0.0109, "step": 25050 }, { "epoch": 33.5475234270415, "grad_norm": 0.4703138768672943, "learning_rate": 8.939703105722148e-05, "loss": 0.0123, "step": 25060 }, { "epoch": 33.56091030789826, "grad_norm": 0.10779578238725662, "learning_rate": 8.93868476424629e-05, "loss": 0.0117, "step": 25070 }, { "epoch": 33.57429718875502, "grad_norm": 0.14941829442977905, "learning_rate": 8.937665992041786e-05, "loss": 0.0122, "step": 25080 }, { "epoch": 33.58768406961178, "grad_norm": 0.1668533980846405, "learning_rate": 8.93664678922005e-05, "loss": 0.0112, "step": 25090 }, { "epoch": 33.60107095046854, "grad_norm": 0.1853092759847641, "learning_rate": 8.93562715589254e-05, "loss": 0.0131, "step": 25100 }, { "epoch": 33.6144578313253, "grad_norm": 0.16959653794765472, "learning_rate": 8.934607092170762e-05, "loss": 0.0122, "step": 25110 }, { "epoch": 33.62784471218206, "grad_norm": 0.4293333888053894, "learning_rate": 8.933586598166266e-05, "loss": 0.0101, "step": 25120 }, { "epoch": 33.641231593038825, "grad_norm": 0.21766307950019836, "learning_rate": 8.932565673990655e-05, "loss": 0.0135, "step": 25130 }, { "epoch": 33.65461847389558, "grad_norm": 0.1933445930480957, "learning_rate": 8.931544319755574e-05, "loss": 0.0104, "step": 25140 }, { "epoch": 33.668005354752346, "grad_norm": 0.3443896770477295, "learning_rate": 8.930522535572718e-05, "loss": 0.0098, "step": 25150 }, { "epoch": 33.6813922356091, "grad_norm": 0.6733799576759338, "learning_rate": 8.929500321553826e-05, "loss": 0.0141, "step": 25160 }, { "epoch": 33.69477911646587, "grad_norm": 0.1436695009469986, "learning_rate": 8.928477677810686e-05, "loss": 0.0122, "step": 25170 }, { "epoch": 33.708165997322624, "grad_norm": 0.5508798360824585, "learning_rate": 8.927454604455137e-05, "loss": 0.0105, "step": 25180 }, { "epoch": 33.72155287817938, "grad_norm": 0.27971306443214417, "learning_rate": 8.926431101599053e-05, "loss": 0.0131, "step": 25190 }, { "epoch": 33.734939759036145, "grad_norm": 0.23178304731845856, "learning_rate": 8.925407169354369e-05, "loss": 0.0111, "step": 25200 }, { "epoch": 33.7483266398929, "grad_norm": 0.20040066540241241, "learning_rate": 8.92438280783306e-05, "loss": 0.0119, "step": 25210 }, { "epoch": 33.761713520749666, "grad_norm": 0.321507066488266, "learning_rate": 8.923358017147146e-05, "loss": 0.0109, "step": 25220 }, { "epoch": 33.77510040160642, "grad_norm": 0.41257306933403015, "learning_rate": 8.922332797408697e-05, "loss": 0.01, "step": 25230 }, { "epoch": 33.78848728246319, "grad_norm": 0.2001359611749649, "learning_rate": 8.921307148729831e-05, "loss": 0.0119, "step": 25240 }, { "epoch": 33.801874163319944, "grad_norm": 0.25709041953086853, "learning_rate": 8.920281071222712e-05, "loss": 0.0121, "step": 25250 }, { "epoch": 33.81526104417671, "grad_norm": 0.5929139256477356, "learning_rate": 8.919254564999548e-05, "loss": 0.0121, "step": 25260 }, { "epoch": 33.828647925033465, "grad_norm": 0.15648207068443298, "learning_rate": 8.918227630172598e-05, "loss": 0.0112, "step": 25270 }, { "epoch": 33.84203480589023, "grad_norm": 0.21129022538661957, "learning_rate": 8.917200266854165e-05, "loss": 0.0118, "step": 25280 }, { "epoch": 33.855421686746986, "grad_norm": 0.26182791590690613, "learning_rate": 8.9161724751566e-05, "loss": 0.0143, "step": 25290 }, { "epoch": 33.86880856760375, "grad_norm": 0.3204372525215149, "learning_rate": 8.915144255192302e-05, "loss": 0.0101, "step": 25300 }, { "epoch": 33.88219544846051, "grad_norm": 0.30144622921943665, "learning_rate": 8.914115607073714e-05, "loss": 0.0119, "step": 25310 }, { "epoch": 33.89558232931727, "grad_norm": 0.13967442512512207, "learning_rate": 8.913086530913327e-05, "loss": 0.0121, "step": 25320 }, { "epoch": 33.90896921017403, "grad_norm": 0.4566154479980469, "learning_rate": 8.912057026823681e-05, "loss": 0.0105, "step": 25330 }, { "epoch": 33.92235609103079, "grad_norm": 0.32123589515686035, "learning_rate": 8.91102709491736e-05, "loss": 0.0128, "step": 25340 }, { "epoch": 33.93574297188755, "grad_norm": 0.18017517030239105, "learning_rate": 8.909996735306996e-05, "loss": 0.0112, "step": 25350 }, { "epoch": 33.94912985274431, "grad_norm": 0.16833047568798065, "learning_rate": 8.908965948105268e-05, "loss": 0.013, "step": 25360 }, { "epoch": 33.96251673360107, "grad_norm": 0.8862641453742981, "learning_rate": 8.907934733424901e-05, "loss": 0.0114, "step": 25370 }, { "epoch": 33.975903614457835, "grad_norm": 0.22255748510360718, "learning_rate": 8.906903091378666e-05, "loss": 0.0109, "step": 25380 }, { "epoch": 33.98929049531459, "grad_norm": 0.2780456244945526, "learning_rate": 8.905871022079384e-05, "loss": 0.0096, "step": 25390 }, { "epoch": 34.00267737617135, "grad_norm": 0.3401433825492859, "learning_rate": 8.90483852563992e-05, "loss": 0.0122, "step": 25400 }, { "epoch": 34.01606425702811, "grad_norm": 1.0162380933761597, "learning_rate": 8.903805602173185e-05, "loss": 0.0096, "step": 25410 }, { "epoch": 34.02945113788487, "grad_norm": 0.3510029911994934, "learning_rate": 8.902772251792137e-05, "loss": 0.0113, "step": 25420 }, { "epoch": 34.042838018741634, "grad_norm": 0.21390432119369507, "learning_rate": 8.901738474609786e-05, "loss": 0.0121, "step": 25430 }, { "epoch": 34.05622489959839, "grad_norm": 0.25189220905303955, "learning_rate": 8.900704270739179e-05, "loss": 0.0115, "step": 25440 }, { "epoch": 34.069611780455155, "grad_norm": 0.2547735571861267, "learning_rate": 8.89966964029342e-05, "loss": 0.0143, "step": 25450 }, { "epoch": 34.08299866131191, "grad_norm": 0.26736024022102356, "learning_rate": 8.898634583385652e-05, "loss": 0.0094, "step": 25460 }, { "epoch": 34.096385542168676, "grad_norm": 0.13606499135494232, "learning_rate": 8.897599100129065e-05, "loss": 0.0129, "step": 25470 }, { "epoch": 34.10977242302543, "grad_norm": 0.10840287804603577, "learning_rate": 8.896563190636903e-05, "loss": 0.0119, "step": 25480 }, { "epoch": 34.1231593038822, "grad_norm": 0.3877410590648651, "learning_rate": 8.895526855022448e-05, "loss": 0.0119, "step": 25490 }, { "epoch": 34.136546184738954, "grad_norm": 0.18598733842372894, "learning_rate": 8.894490093399033e-05, "loss": 0.0124, "step": 25500 }, { "epoch": 34.14993306559572, "grad_norm": 0.2570599317550659, "learning_rate": 8.893452905880035e-05, "loss": 0.0131, "step": 25510 }, { "epoch": 34.163319946452475, "grad_norm": 0.3402653634548187, "learning_rate": 8.892415292578883e-05, "loss": 0.0124, "step": 25520 }, { "epoch": 34.17670682730924, "grad_norm": 0.19759851694107056, "learning_rate": 8.891377253609046e-05, "loss": 0.0109, "step": 25530 }, { "epoch": 34.190093708165996, "grad_norm": 0.1962110996246338, "learning_rate": 8.890338789084043e-05, "loss": 0.0143, "step": 25540 }, { "epoch": 34.20348058902276, "grad_norm": 0.14066341519355774, "learning_rate": 8.88929989911744e-05, "loss": 0.0124, "step": 25550 }, { "epoch": 34.21686746987952, "grad_norm": 0.1829090118408203, "learning_rate": 8.888260583822847e-05, "loss": 0.0117, "step": 25560 }, { "epoch": 34.23025435073628, "grad_norm": 0.16297701001167297, "learning_rate": 8.887220843313921e-05, "loss": 0.0116, "step": 25570 }, { "epoch": 34.24364123159304, "grad_norm": 0.9115251898765564, "learning_rate": 8.88618067770437e-05, "loss": 0.0119, "step": 25580 }, { "epoch": 34.2570281124498, "grad_norm": 0.2208644598722458, "learning_rate": 8.885140087107942e-05, "loss": 0.0133, "step": 25590 }, { "epoch": 34.27041499330656, "grad_norm": 0.2285415232181549, "learning_rate": 8.884099071638436e-05, "loss": 0.0111, "step": 25600 }, { "epoch": 34.283801874163316, "grad_norm": 0.18353936076164246, "learning_rate": 8.883057631409695e-05, "loss": 0.0145, "step": 25610 }, { "epoch": 34.29718875502008, "grad_norm": 0.13348957896232605, "learning_rate": 8.882015766535608e-05, "loss": 0.0117, "step": 25620 }, { "epoch": 34.31057563587684, "grad_norm": 0.24519462883472443, "learning_rate": 8.880973477130115e-05, "loss": 0.0107, "step": 25630 }, { "epoch": 34.3239625167336, "grad_norm": 0.21756692230701447, "learning_rate": 8.879930763307197e-05, "loss": 0.0105, "step": 25640 }, { "epoch": 34.33734939759036, "grad_norm": 0.3914119303226471, "learning_rate": 8.878887625180884e-05, "loss": 0.0117, "step": 25650 }, { "epoch": 34.35073627844712, "grad_norm": 0.1715836524963379, "learning_rate": 8.877844062865253e-05, "loss": 0.0116, "step": 25660 }, { "epoch": 34.36412315930388, "grad_norm": 0.2590775787830353, "learning_rate": 8.876800076474424e-05, "loss": 0.0114, "step": 25670 }, { "epoch": 34.377510040160644, "grad_norm": 0.8942689299583435, "learning_rate": 8.875755666122568e-05, "loss": 0.0104, "step": 25680 }, { "epoch": 34.3908969210174, "grad_norm": 0.3754810690879822, "learning_rate": 8.8747108319239e-05, "loss": 0.0107, "step": 25690 }, { "epoch": 34.404283801874165, "grad_norm": 0.16544103622436523, "learning_rate": 8.87366557399268e-05, "loss": 0.0107, "step": 25700 }, { "epoch": 34.41767068273092, "grad_norm": 0.29465100169181824, "learning_rate": 8.872619892443217e-05, "loss": 0.0133, "step": 25710 }, { "epoch": 34.431057563587686, "grad_norm": 2.244140625, "learning_rate": 8.871573787389865e-05, "loss": 0.0139, "step": 25720 }, { "epoch": 34.44444444444444, "grad_norm": 0.4464070796966553, "learning_rate": 8.870527258947024e-05, "loss": 0.0112, "step": 25730 }, { "epoch": 34.45783132530121, "grad_norm": 0.14983275532722473, "learning_rate": 8.869480307229143e-05, "loss": 0.0123, "step": 25740 }, { "epoch": 34.471218206157964, "grad_norm": 0.1583736389875412, "learning_rate": 8.868432932350712e-05, "loss": 0.0107, "step": 25750 }, { "epoch": 34.48460508701473, "grad_norm": 1.8866223096847534, "learning_rate": 8.867385134426272e-05, "loss": 0.0112, "step": 25760 }, { "epoch": 34.497991967871485, "grad_norm": 0.5049846768379211, "learning_rate": 8.866336913570407e-05, "loss": 0.0127, "step": 25770 }, { "epoch": 34.51137884872825, "grad_norm": 0.2721305191516876, "learning_rate": 8.865288269897751e-05, "loss": 0.0114, "step": 25780 }, { "epoch": 34.524765729585006, "grad_norm": 0.17719218134880066, "learning_rate": 8.864239203522981e-05, "loss": 0.0127, "step": 25790 }, { "epoch": 34.53815261044177, "grad_norm": 0.16324196755886078, "learning_rate": 8.863189714560822e-05, "loss": 0.0146, "step": 25800 }, { "epoch": 34.55153949129853, "grad_norm": 0.18158762156963348, "learning_rate": 8.862139803126043e-05, "loss": 0.0118, "step": 25810 }, { "epoch": 34.56492637215529, "grad_norm": 0.16931235790252686, "learning_rate": 8.861089469333463e-05, "loss": 0.014, "step": 25820 }, { "epoch": 34.57831325301205, "grad_norm": 0.23017539083957672, "learning_rate": 8.860038713297944e-05, "loss": 0.014, "step": 25830 }, { "epoch": 34.591700133868805, "grad_norm": 0.13210903108119965, "learning_rate": 8.858987535134394e-05, "loss": 0.0096, "step": 25840 }, { "epoch": 34.60508701472557, "grad_norm": 0.2642437517642975, "learning_rate": 8.857935934957769e-05, "loss": 0.0122, "step": 25850 }, { "epoch": 34.618473895582326, "grad_norm": 0.576364278793335, "learning_rate": 8.856883912883071e-05, "loss": 0.0119, "step": 25860 }, { "epoch": 34.63186077643909, "grad_norm": 0.39695438742637634, "learning_rate": 8.855831469025346e-05, "loss": 0.0116, "step": 25870 }, { "epoch": 34.64524765729585, "grad_norm": 0.2933303415775299, "learning_rate": 8.854778603499689e-05, "loss": 0.0129, "step": 25880 }, { "epoch": 34.65863453815261, "grad_norm": 0.17013567686080933, "learning_rate": 8.85372531642124e-05, "loss": 0.0107, "step": 25890 }, { "epoch": 34.67202141900937, "grad_norm": 1.0078059434890747, "learning_rate": 8.852671607905185e-05, "loss": 0.0131, "step": 25900 }, { "epoch": 34.68540829986613, "grad_norm": 0.29369625449180603, "learning_rate": 8.851617478066754e-05, "loss": 0.0117, "step": 25910 }, { "epoch": 34.69879518072289, "grad_norm": 0.5809295177459717, "learning_rate": 8.850562927021227e-05, "loss": 0.0131, "step": 25920 }, { "epoch": 34.712182061579654, "grad_norm": 0.28487974405288696, "learning_rate": 8.849507954883928e-05, "loss": 0.0107, "step": 25930 }, { "epoch": 34.72556894243641, "grad_norm": 0.17907127737998962, "learning_rate": 8.848452561770226e-05, "loss": 0.0105, "step": 25940 }, { "epoch": 34.738955823293175, "grad_norm": 0.18244723975658417, "learning_rate": 8.847396747795538e-05, "loss": 0.0114, "step": 25950 }, { "epoch": 34.75234270414993, "grad_norm": 0.13908593356609344, "learning_rate": 8.846340513075327e-05, "loss": 0.0116, "step": 25960 }, { "epoch": 34.765729585006696, "grad_norm": 0.9468112587928772, "learning_rate": 8.845283857725099e-05, "loss": 0.0114, "step": 25970 }, { "epoch": 34.77911646586345, "grad_norm": 0.10991780459880829, "learning_rate": 8.844226781860409e-05, "loss": 0.0102, "step": 25980 }, { "epoch": 34.79250334672022, "grad_norm": 0.511364758014679, "learning_rate": 8.84316928559686e-05, "loss": 0.0105, "step": 25990 }, { "epoch": 34.805890227576974, "grad_norm": 1.3900320529937744, "learning_rate": 8.842111369050094e-05, "loss": 0.0143, "step": 26000 }, { "epoch": 34.81927710843374, "grad_norm": 0.4621039628982544, "learning_rate": 8.841053032335808e-05, "loss": 0.0175, "step": 26010 }, { "epoch": 34.832663989290495, "grad_norm": 0.14431363344192505, "learning_rate": 8.839994275569735e-05, "loss": 0.0136, "step": 26020 }, { "epoch": 34.84605087014726, "grad_norm": 0.18121908605098724, "learning_rate": 8.838935098867662e-05, "loss": 0.0111, "step": 26030 }, { "epoch": 34.859437751004016, "grad_norm": 0.22371040284633636, "learning_rate": 8.837875502345418e-05, "loss": 0.0137, "step": 26040 }, { "epoch": 34.87282463186077, "grad_norm": 0.6243530511856079, "learning_rate": 8.83681548611888e-05, "loss": 0.0132, "step": 26050 }, { "epoch": 34.88621151271754, "grad_norm": 0.11755707859992981, "learning_rate": 8.835755050303969e-05, "loss": 0.0123, "step": 26060 }, { "epoch": 34.899598393574294, "grad_norm": 0.19850726425647736, "learning_rate": 8.834694195016653e-05, "loss": 0.0119, "step": 26070 }, { "epoch": 34.91298527443106, "grad_norm": 0.14521871507167816, "learning_rate": 8.833632920372942e-05, "loss": 0.0109, "step": 26080 }, { "epoch": 34.926372155287815, "grad_norm": 0.2712528109550476, "learning_rate": 8.832571226488903e-05, "loss": 0.0122, "step": 26090 }, { "epoch": 34.93975903614458, "grad_norm": 0.18104371428489685, "learning_rate": 8.831509113480634e-05, "loss": 0.0102, "step": 26100 }, { "epoch": 34.953145917001336, "grad_norm": 0.14810432493686676, "learning_rate": 8.83044658146429e-05, "loss": 0.0107, "step": 26110 }, { "epoch": 34.9665327978581, "grad_norm": 0.2999187707901001, "learning_rate": 8.829383630556067e-05, "loss": 0.0121, "step": 26120 }, { "epoch": 34.97991967871486, "grad_norm": 0.18992742896080017, "learning_rate": 8.828320260872207e-05, "loss": 0.0119, "step": 26130 }, { "epoch": 34.99330655957162, "grad_norm": 0.11569789797067642, "learning_rate": 8.827256472529e-05, "loss": 0.0112, "step": 26140 }, { "epoch": 35.00669344042838, "grad_norm": 0.20847541093826294, "learning_rate": 8.826192265642778e-05, "loss": 0.0119, "step": 26150 }, { "epoch": 35.02008032128514, "grad_norm": 0.1779152750968933, "learning_rate": 8.825127640329923e-05, "loss": 0.0107, "step": 26160 }, { "epoch": 35.0334672021419, "grad_norm": 0.5987365245819092, "learning_rate": 8.824062596706861e-05, "loss": 0.0098, "step": 26170 }, { "epoch": 35.046854082998664, "grad_norm": 0.20283886790275574, "learning_rate": 8.822997134890062e-05, "loss": 0.0132, "step": 26180 }, { "epoch": 35.06024096385542, "grad_norm": 0.33340615034103394, "learning_rate": 8.821931254996044e-05, "loss": 0.0135, "step": 26190 }, { "epoch": 35.073627844712185, "grad_norm": 0.536311149597168, "learning_rate": 8.82086495714137e-05, "loss": 0.0113, "step": 26200 }, { "epoch": 35.08701472556894, "grad_norm": 0.11948871612548828, "learning_rate": 8.81979824144265e-05, "loss": 0.0099, "step": 26210 }, { "epoch": 35.100401606425706, "grad_norm": 0.17297612130641937, "learning_rate": 8.818731108016536e-05, "loss": 0.0109, "step": 26220 }, { "epoch": 35.11378848728246, "grad_norm": 0.17860351502895355, "learning_rate": 8.81766355697973e-05, "loss": 0.0123, "step": 26230 }, { "epoch": 35.12717536813923, "grad_norm": 0.15131692588329315, "learning_rate": 8.816595588448977e-05, "loss": 0.011, "step": 26240 }, { "epoch": 35.140562248995984, "grad_norm": 0.20079578459262848, "learning_rate": 8.81552720254107e-05, "loss": 0.0126, "step": 26250 }, { "epoch": 35.15394912985274, "grad_norm": 0.23064833879470825, "learning_rate": 8.814458399372842e-05, "loss": 0.0119, "step": 26260 }, { "epoch": 35.167336010709505, "grad_norm": 0.11983641237020493, "learning_rate": 8.813389179061181e-05, "loss": 0.0092, "step": 26270 }, { "epoch": 35.18072289156626, "grad_norm": 0.24202944338321686, "learning_rate": 8.812319541723012e-05, "loss": 0.012, "step": 26280 }, { "epoch": 35.194109772423026, "grad_norm": 0.21504050493240356, "learning_rate": 8.811249487475309e-05, "loss": 0.0127, "step": 26290 }, { "epoch": 35.20749665327978, "grad_norm": 0.21687628328800201, "learning_rate": 8.810179016435092e-05, "loss": 0.0128, "step": 26300 }, { "epoch": 35.22088353413655, "grad_norm": 0.15552714467048645, "learning_rate": 8.809108128719428e-05, "loss": 0.0101, "step": 26310 }, { "epoch": 35.234270414993304, "grad_norm": 0.7400887608528137, "learning_rate": 8.808036824445424e-05, "loss": 0.0096, "step": 26320 }, { "epoch": 35.24765729585007, "grad_norm": 0.18760520219802856, "learning_rate": 8.806965103730238e-05, "loss": 0.012, "step": 26330 }, { "epoch": 35.261044176706825, "grad_norm": 0.3026619851589203, "learning_rate": 8.805892966691074e-05, "loss": 0.0143, "step": 26340 }, { "epoch": 35.27443105756359, "grad_norm": 1.5383529663085938, "learning_rate": 8.804820413445175e-05, "loss": 0.0134, "step": 26350 }, { "epoch": 35.287817938420346, "grad_norm": 0.19495239853858948, "learning_rate": 8.803747444109837e-05, "loss": 0.0147, "step": 26360 }, { "epoch": 35.30120481927711, "grad_norm": 0.2129514366388321, "learning_rate": 8.802674058802399e-05, "loss": 0.0122, "step": 26370 }, { "epoch": 35.31459170013387, "grad_norm": 0.21501867473125458, "learning_rate": 8.801600257640241e-05, "loss": 0.0107, "step": 26380 }, { "epoch": 35.32797858099063, "grad_norm": 0.24950599670410156, "learning_rate": 8.800526040740795e-05, "loss": 0.0096, "step": 26390 }, { "epoch": 35.34136546184739, "grad_norm": 0.24148006737232208, "learning_rate": 8.799451408221535e-05, "loss": 0.0096, "step": 26400 }, { "epoch": 35.35475234270415, "grad_norm": 0.28828588128089905, "learning_rate": 8.798376360199982e-05, "loss": 0.0128, "step": 26410 }, { "epoch": 35.36813922356091, "grad_norm": 0.1728399693965912, "learning_rate": 8.797300896793701e-05, "loss": 0.0127, "step": 26420 }, { "epoch": 35.381526104417674, "grad_norm": 0.23337264358997345, "learning_rate": 8.796225018120302e-05, "loss": 0.0141, "step": 26430 }, { "epoch": 35.39491298527443, "grad_norm": 0.28035423159599304, "learning_rate": 8.795148724297444e-05, "loss": 0.01, "step": 26440 }, { "epoch": 35.408299866131195, "grad_norm": 1.5698907375335693, "learning_rate": 8.794072015442825e-05, "loss": 0.0109, "step": 26450 }, { "epoch": 35.42168674698795, "grad_norm": 0.1766662448644638, "learning_rate": 8.792994891674198e-05, "loss": 0.0111, "step": 26460 }, { "epoch": 35.43507362784471, "grad_norm": 0.2180911749601364, "learning_rate": 8.79191735310935e-05, "loss": 0.014, "step": 26470 }, { "epoch": 35.44846050870147, "grad_norm": 0.3746025562286377, "learning_rate": 8.790839399866122e-05, "loss": 0.0112, "step": 26480 }, { "epoch": 35.46184738955823, "grad_norm": 0.24366745352745056, "learning_rate": 8.789761032062397e-05, "loss": 0.0091, "step": 26490 }, { "epoch": 35.475234270414994, "grad_norm": 0.8302205801010132, "learning_rate": 8.788682249816103e-05, "loss": 0.0139, "step": 26500 }, { "epoch": 35.48862115127175, "grad_norm": 0.21463818848133087, "learning_rate": 8.787603053245215e-05, "loss": 0.0118, "step": 26510 }, { "epoch": 35.502008032128515, "grad_norm": 0.16806894540786743, "learning_rate": 8.78652344246775e-05, "loss": 0.0105, "step": 26520 }, { "epoch": 35.51539491298527, "grad_norm": 0.5819910764694214, "learning_rate": 8.785443417601776e-05, "loss": 0.0128, "step": 26530 }, { "epoch": 35.528781793842036, "grad_norm": 0.1398518681526184, "learning_rate": 8.784362978765401e-05, "loss": 0.011, "step": 26540 }, { "epoch": 35.54216867469879, "grad_norm": 0.25479206442832947, "learning_rate": 8.783282126076779e-05, "loss": 0.0126, "step": 26550 }, { "epoch": 35.55555555555556, "grad_norm": 0.26682648062705994, "learning_rate": 8.782200859654112e-05, "loss": 0.0095, "step": 26560 }, { "epoch": 35.568942436412314, "grad_norm": 0.23013491928577423, "learning_rate": 8.781119179615646e-05, "loss": 0.0125, "step": 26570 }, { "epoch": 35.58232931726908, "grad_norm": 0.214827299118042, "learning_rate": 8.780037086079674e-05, "loss": 0.0121, "step": 26580 }, { "epoch": 35.595716198125835, "grad_norm": 0.24361975491046906, "learning_rate": 8.778954579164527e-05, "loss": 0.0114, "step": 26590 }, { "epoch": 35.6091030789826, "grad_norm": 0.2273959219455719, "learning_rate": 8.777871658988588e-05, "loss": 0.0139, "step": 26600 }, { "epoch": 35.622489959839356, "grad_norm": 0.20541608333587646, "learning_rate": 8.776788325670285e-05, "loss": 0.0125, "step": 26610 }, { "epoch": 35.63587684069612, "grad_norm": 0.6963440775871277, "learning_rate": 8.775704579328089e-05, "loss": 0.0124, "step": 26620 }, { "epoch": 35.64926372155288, "grad_norm": 0.20782816410064697, "learning_rate": 8.774620420080517e-05, "loss": 0.0112, "step": 26630 }, { "epoch": 35.66265060240964, "grad_norm": 0.23549659550189972, "learning_rate": 8.773535848046131e-05, "loss": 0.0112, "step": 26640 }, { "epoch": 35.6760374832664, "grad_norm": 0.14281369745731354, "learning_rate": 8.772450863343538e-05, "loss": 0.011, "step": 26650 }, { "epoch": 35.68942436412316, "grad_norm": 0.20140734314918518, "learning_rate": 8.77136546609139e-05, "loss": 0.0126, "step": 26660 }, { "epoch": 35.70281124497992, "grad_norm": 0.11677079647779465, "learning_rate": 8.770279656408385e-05, "loss": 0.013, "step": 26670 }, { "epoch": 35.716198125836684, "grad_norm": 0.19187423586845398, "learning_rate": 8.769193434413265e-05, "loss": 0.0134, "step": 26680 }, { "epoch": 35.72958500669344, "grad_norm": 0.21000339090824127, "learning_rate": 8.76810680022482e-05, "loss": 0.0134, "step": 26690 }, { "epoch": 35.7429718875502, "grad_norm": 0.2658902108669281, "learning_rate": 8.767019753961878e-05, "loss": 0.0112, "step": 26700 }, { "epoch": 35.75635876840696, "grad_norm": 0.13326789438724518, "learning_rate": 8.765932295743321e-05, "loss": 0.0112, "step": 26710 }, { "epoch": 35.76974564926372, "grad_norm": 0.5172178745269775, "learning_rate": 8.764844425688068e-05, "loss": 0.011, "step": 26720 }, { "epoch": 35.78313253012048, "grad_norm": 0.20220811665058136, "learning_rate": 8.763756143915092e-05, "loss": 0.0114, "step": 26730 }, { "epoch": 35.79651941097724, "grad_norm": 0.23697182536125183, "learning_rate": 8.7626674505434e-05, "loss": 0.0143, "step": 26740 }, { "epoch": 35.809906291834004, "grad_norm": 0.14355838298797607, "learning_rate": 8.761578345692053e-05, "loss": 0.0124, "step": 26750 }, { "epoch": 35.82329317269076, "grad_norm": 0.15390989184379578, "learning_rate": 8.760488829480156e-05, "loss": 0.0133, "step": 26760 }, { "epoch": 35.836680053547525, "grad_norm": 0.15283401310443878, "learning_rate": 8.759398902026854e-05, "loss": 0.011, "step": 26770 }, { "epoch": 35.85006693440428, "grad_norm": 1.155326008796692, "learning_rate": 8.758308563451339e-05, "loss": 0.0126, "step": 26780 }, { "epoch": 35.863453815261046, "grad_norm": 0.1906946748495102, "learning_rate": 8.75721781387285e-05, "loss": 0.0116, "step": 26790 }, { "epoch": 35.8768406961178, "grad_norm": 0.2342602014541626, "learning_rate": 8.75612665341067e-05, "loss": 0.0115, "step": 26800 }, { "epoch": 35.89022757697457, "grad_norm": 0.308368057012558, "learning_rate": 8.755035082184126e-05, "loss": 0.0141, "step": 26810 }, { "epoch": 35.903614457831324, "grad_norm": 0.26279208064079285, "learning_rate": 8.753943100312592e-05, "loss": 0.0127, "step": 26820 }, { "epoch": 35.91700133868809, "grad_norm": 0.37225452065467834, "learning_rate": 8.752850707915484e-05, "loss": 0.013, "step": 26830 }, { "epoch": 35.930388219544845, "grad_norm": 0.2739514410495758, "learning_rate": 8.751757905112264e-05, "loss": 0.0116, "step": 26840 }, { "epoch": 35.94377510040161, "grad_norm": 0.14545418322086334, "learning_rate": 8.75066469202244e-05, "loss": 0.0107, "step": 26850 }, { "epoch": 35.957161981258366, "grad_norm": 0.2066015750169754, "learning_rate": 8.749571068765567e-05, "loss": 0.0122, "step": 26860 }, { "epoch": 35.97054886211513, "grad_norm": 0.46295061707496643, "learning_rate": 8.748477035461238e-05, "loss": 0.0097, "step": 26870 }, { "epoch": 35.98393574297189, "grad_norm": 0.11994798481464386, "learning_rate": 8.747382592229095e-05, "loss": 0.0141, "step": 26880 }, { "epoch": 35.99732262382865, "grad_norm": 0.19142015278339386, "learning_rate": 8.746287739188828e-05, "loss": 0.0128, "step": 26890 }, { "epoch": 36.01070950468541, "grad_norm": 0.19994881749153137, "learning_rate": 8.745192476460165e-05, "loss": 0.0112, "step": 26900 }, { "epoch": 36.024096385542165, "grad_norm": 0.17494314908981323, "learning_rate": 8.744096804162882e-05, "loss": 0.0131, "step": 26910 }, { "epoch": 36.03748326639893, "grad_norm": 0.1302599310874939, "learning_rate": 8.743000722416804e-05, "loss": 0.0106, "step": 26920 }, { "epoch": 36.05087014725569, "grad_norm": 0.23476481437683105, "learning_rate": 8.741904231341793e-05, "loss": 0.0144, "step": 26930 }, { "epoch": 36.06425702811245, "grad_norm": 0.15634670853614807, "learning_rate": 8.740807331057762e-05, "loss": 0.0096, "step": 26940 }, { "epoch": 36.07764390896921, "grad_norm": 0.38436686992645264, "learning_rate": 8.739710021684667e-05, "loss": 0.0106, "step": 26950 }, { "epoch": 36.09103078982597, "grad_norm": 0.2073112279176712, "learning_rate": 8.738612303342503e-05, "loss": 0.0102, "step": 26960 }, { "epoch": 36.10441767068273, "grad_norm": 0.2585066258907318, "learning_rate": 8.73751417615132e-05, "loss": 0.011, "step": 26970 }, { "epoch": 36.11780455153949, "grad_norm": 0.46577468514442444, "learning_rate": 8.736415640231208e-05, "loss": 0.0106, "step": 26980 }, { "epoch": 36.13119143239625, "grad_norm": 0.19422362744808197, "learning_rate": 8.735316695702297e-05, "loss": 0.0119, "step": 26990 }, { "epoch": 36.144578313253014, "grad_norm": 0.13824893534183502, "learning_rate": 8.734217342684769e-05, "loss": 0.0105, "step": 27000 }, { "epoch": 36.15796519410977, "grad_norm": 0.2461722046136856, "learning_rate": 8.733117581298847e-05, "loss": 0.0125, "step": 27010 }, { "epoch": 36.171352074966535, "grad_norm": 0.23649291694164276, "learning_rate": 8.732017411664796e-05, "loss": 0.0123, "step": 27020 }, { "epoch": 36.18473895582329, "grad_norm": 0.21782782673835754, "learning_rate": 8.730916833902936e-05, "loss": 0.0123, "step": 27030 }, { "epoch": 36.198125836680056, "grad_norm": 0.1672905534505844, "learning_rate": 8.729815848133618e-05, "loss": 0.0098, "step": 27040 }, { "epoch": 36.21151271753681, "grad_norm": 0.19101080298423767, "learning_rate": 8.728714454477247e-05, "loss": 0.0127, "step": 27050 }, { "epoch": 36.22489959839358, "grad_norm": 0.25358548760414124, "learning_rate": 8.727612653054269e-05, "loss": 0.0108, "step": 27060 }, { "epoch": 36.238286479250334, "grad_norm": 0.12509167194366455, "learning_rate": 8.726510443985176e-05, "loss": 0.0111, "step": 27070 }, { "epoch": 36.2516733601071, "grad_norm": 0.2311095893383026, "learning_rate": 8.725407827390503e-05, "loss": 0.0113, "step": 27080 }, { "epoch": 36.265060240963855, "grad_norm": 0.17135554552078247, "learning_rate": 8.724304803390833e-05, "loss": 0.0129, "step": 27090 }, { "epoch": 36.27844712182062, "grad_norm": 0.3098801076412201, "learning_rate": 8.723201372106788e-05, "loss": 0.0128, "step": 27100 }, { "epoch": 36.291834002677376, "grad_norm": 0.24157178401947021, "learning_rate": 8.722097533659038e-05, "loss": 0.0114, "step": 27110 }, { "epoch": 36.30522088353413, "grad_norm": 0.1643972545862198, "learning_rate": 8.720993288168299e-05, "loss": 0.0119, "step": 27120 }, { "epoch": 36.3186077643909, "grad_norm": 0.17901019752025604, "learning_rate": 8.719888635755327e-05, "loss": 0.0095, "step": 27130 }, { "epoch": 36.331994645247654, "grad_norm": 0.172870472073555, "learning_rate": 8.718783576540928e-05, "loss": 0.0111, "step": 27140 }, { "epoch": 36.34538152610442, "grad_norm": 0.20588766038417816, "learning_rate": 8.717678110645948e-05, "loss": 0.0118, "step": 27150 }, { "epoch": 36.358768406961175, "grad_norm": 0.31234195828437805, "learning_rate": 8.716572238191279e-05, "loss": 0.0091, "step": 27160 }, { "epoch": 36.37215528781794, "grad_norm": 1.1488181352615356, "learning_rate": 8.715465959297857e-05, "loss": 0.0097, "step": 27170 }, { "epoch": 36.3855421686747, "grad_norm": 0.2313264012336731, "learning_rate": 8.714359274086665e-05, "loss": 0.0107, "step": 27180 }, { "epoch": 36.39892904953146, "grad_norm": 0.15547066926956177, "learning_rate": 8.713252182678726e-05, "loss": 0.0109, "step": 27190 }, { "epoch": 36.41231593038822, "grad_norm": 0.15912681818008423, "learning_rate": 8.712144685195112e-05, "loss": 0.0123, "step": 27200 }, { "epoch": 36.42570281124498, "grad_norm": 0.6552757024765015, "learning_rate": 8.711036781756936e-05, "loss": 0.0115, "step": 27210 }, { "epoch": 36.43908969210174, "grad_norm": 0.2577696442604065, "learning_rate": 8.709928472485357e-05, "loss": 0.0109, "step": 27220 }, { "epoch": 36.4524765729585, "grad_norm": 0.21673725545406342, "learning_rate": 8.708819757501579e-05, "loss": 0.0108, "step": 27230 }, { "epoch": 36.46586345381526, "grad_norm": 0.23657134175300598, "learning_rate": 8.707710636926846e-05, "loss": 0.0123, "step": 27240 }, { "epoch": 36.479250334672024, "grad_norm": 0.44058990478515625, "learning_rate": 8.706601110882455e-05, "loss": 0.0113, "step": 27250 }, { "epoch": 36.49263721552878, "grad_norm": 0.20842595398426056, "learning_rate": 8.705491179489738e-05, "loss": 0.0115, "step": 27260 }, { "epoch": 36.506024096385545, "grad_norm": 0.1972368359565735, "learning_rate": 8.704380842870077e-05, "loss": 0.0117, "step": 27270 }, { "epoch": 36.5194109772423, "grad_norm": 0.17027729749679565, "learning_rate": 8.703270101144895e-05, "loss": 0.0116, "step": 27280 }, { "epoch": 36.532797858099066, "grad_norm": 0.2420169711112976, "learning_rate": 8.702158954435664e-05, "loss": 0.0143, "step": 27290 }, { "epoch": 36.54618473895582, "grad_norm": 0.13448238372802734, "learning_rate": 8.701047402863896e-05, "loss": 0.0124, "step": 27300 }, { "epoch": 36.55957161981259, "grad_norm": 0.25940823554992676, "learning_rate": 8.699935446551148e-05, "loss": 0.0129, "step": 27310 }, { "epoch": 36.572958500669344, "grad_norm": 0.2289152592420578, "learning_rate": 8.698823085619022e-05, "loss": 0.0135, "step": 27320 }, { "epoch": 36.5863453815261, "grad_norm": 0.12258908897638321, "learning_rate": 8.697710320189166e-05, "loss": 0.0117, "step": 27330 }, { "epoch": 36.599732262382865, "grad_norm": 0.14444848895072937, "learning_rate": 8.696597150383268e-05, "loss": 0.0111, "step": 27340 }, { "epoch": 36.61311914323962, "grad_norm": 0.20284226536750793, "learning_rate": 8.695483576323063e-05, "loss": 0.0114, "step": 27350 }, { "epoch": 36.626506024096386, "grad_norm": 0.16655726730823517, "learning_rate": 8.69436959813033e-05, "loss": 0.0112, "step": 27360 }, { "epoch": 36.63989290495314, "grad_norm": 0.1906961351633072, "learning_rate": 8.693255215926892e-05, "loss": 0.0116, "step": 27370 }, { "epoch": 36.65327978580991, "grad_norm": 0.14992667734622955, "learning_rate": 8.692140429834617e-05, "loss": 0.0112, "step": 27380 }, { "epoch": 36.666666666666664, "grad_norm": 0.2179158627986908, "learning_rate": 8.691025239975415e-05, "loss": 0.0113, "step": 27390 }, { "epoch": 36.68005354752343, "grad_norm": 0.2929668724536896, "learning_rate": 8.689909646471243e-05, "loss": 0.0092, "step": 27400 }, { "epoch": 36.693440428380185, "grad_norm": 0.15828245878219604, "learning_rate": 8.688793649444099e-05, "loss": 0.01, "step": 27410 }, { "epoch": 36.70682730923695, "grad_norm": 0.22230783104896545, "learning_rate": 8.687677249016029e-05, "loss": 0.0093, "step": 27420 }, { "epoch": 36.72021419009371, "grad_norm": 0.18263496458530426, "learning_rate": 8.686560445309118e-05, "loss": 0.0111, "step": 27430 }, { "epoch": 36.73360107095047, "grad_norm": 0.210574209690094, "learning_rate": 8.685443238445499e-05, "loss": 0.0109, "step": 27440 }, { "epoch": 36.74698795180723, "grad_norm": 0.1997653841972351, "learning_rate": 8.68432562854735e-05, "loss": 0.011, "step": 27450 }, { "epoch": 36.76037483266399, "grad_norm": 0.20639529824256897, "learning_rate": 8.683207615736887e-05, "loss": 0.0128, "step": 27460 }, { "epoch": 36.77376171352075, "grad_norm": 0.43118396401405334, "learning_rate": 8.682089200136379e-05, "loss": 0.0129, "step": 27470 }, { "epoch": 36.78714859437751, "grad_norm": 0.21357795596122742, "learning_rate": 8.680970381868132e-05, "loss": 0.0134, "step": 27480 }, { "epoch": 36.80053547523427, "grad_norm": 0.254239559173584, "learning_rate": 8.679851161054498e-05, "loss": 0.0132, "step": 27490 }, { "epoch": 36.813922356091034, "grad_norm": 0.29492896795272827, "learning_rate": 8.678731537817873e-05, "loss": 0.0094, "step": 27500 }, { "epoch": 36.82730923694779, "grad_norm": 0.25997859239578247, "learning_rate": 8.677611512280697e-05, "loss": 0.0125, "step": 27510 }, { "epoch": 36.840696117804555, "grad_norm": 0.17320498824119568, "learning_rate": 8.676491084565457e-05, "loss": 0.0106, "step": 27520 }, { "epoch": 36.85408299866131, "grad_norm": 0.18743467330932617, "learning_rate": 8.675370254794678e-05, "loss": 0.0122, "step": 27530 }, { "epoch": 36.86746987951807, "grad_norm": 0.2288375347852707, "learning_rate": 8.674249023090935e-05, "loss": 0.0126, "step": 27540 }, { "epoch": 36.88085676037483, "grad_norm": 0.24158120155334473, "learning_rate": 8.673127389576843e-05, "loss": 0.0148, "step": 27550 }, { "epoch": 36.89424364123159, "grad_norm": 0.23430117964744568, "learning_rate": 8.67200535437506e-05, "loss": 0.0108, "step": 27560 }, { "epoch": 36.907630522088354, "grad_norm": 0.17164568603038788, "learning_rate": 8.670882917608296e-05, "loss": 0.0108, "step": 27570 }, { "epoch": 36.92101740294511, "grad_norm": 0.2965390384197235, "learning_rate": 8.669760079399292e-05, "loss": 0.0123, "step": 27580 }, { "epoch": 36.934404283801875, "grad_norm": 0.17160829901695251, "learning_rate": 8.668636839870845e-05, "loss": 0.0135, "step": 27590 }, { "epoch": 36.94779116465863, "grad_norm": 0.4085617959499359, "learning_rate": 8.667513199145789e-05, "loss": 0.0109, "step": 27600 }, { "epoch": 36.961178045515396, "grad_norm": 0.15314032137393951, "learning_rate": 8.666389157347002e-05, "loss": 0.0116, "step": 27610 }, { "epoch": 36.97456492637215, "grad_norm": 0.19964775443077087, "learning_rate": 8.66526471459741e-05, "loss": 0.0116, "step": 27620 }, { "epoch": 36.98795180722892, "grad_norm": 0.16343507170677185, "learning_rate": 8.66413987101998e-05, "loss": 0.0117, "step": 27630 }, { "epoch": 37.001338688085674, "grad_norm": 0.17790032923221588, "learning_rate": 8.663014626737723e-05, "loss": 0.0108, "step": 27640 }, { "epoch": 37.01472556894244, "grad_norm": 0.17679105699062347, "learning_rate": 8.661888981873691e-05, "loss": 0.0141, "step": 27650 }, { "epoch": 37.028112449799195, "grad_norm": 0.2374541014432907, "learning_rate": 8.660762936550988e-05, "loss": 0.012, "step": 27660 }, { "epoch": 37.04149933065596, "grad_norm": 0.3634001314640045, "learning_rate": 8.659636490892753e-05, "loss": 0.0123, "step": 27670 }, { "epoch": 37.054886211512716, "grad_norm": 0.2090577483177185, "learning_rate": 8.658509645022174e-05, "loss": 0.0105, "step": 27680 }, { "epoch": 37.06827309236948, "grad_norm": 0.20935572683811188, "learning_rate": 8.657382399062481e-05, "loss": 0.0117, "step": 27690 }, { "epoch": 37.08165997322624, "grad_norm": 0.18780407309532166, "learning_rate": 8.656254753136946e-05, "loss": 0.0103, "step": 27700 }, { "epoch": 37.095046854083, "grad_norm": 0.22437962889671326, "learning_rate": 8.655126707368891e-05, "loss": 0.0124, "step": 27710 }, { "epoch": 37.10843373493976, "grad_norm": 0.16348136961460114, "learning_rate": 8.653998261881672e-05, "loss": 0.0099, "step": 27720 }, { "epoch": 37.12182061579652, "grad_norm": 0.33282285928726196, "learning_rate": 8.652869416798699e-05, "loss": 0.0104, "step": 27730 }, { "epoch": 37.13520749665328, "grad_norm": 0.24669712781906128, "learning_rate": 8.651740172243417e-05, "loss": 0.0141, "step": 27740 }, { "epoch": 37.14859437751004, "grad_norm": 0.19132374227046967, "learning_rate": 8.65061052833932e-05, "loss": 0.0158, "step": 27750 }, { "epoch": 37.1619812583668, "grad_norm": 0.18745660781860352, "learning_rate": 8.649480485209945e-05, "loss": 0.0111, "step": 27760 }, { "epoch": 37.17536813922356, "grad_norm": 0.25804004073143005, "learning_rate": 8.64835004297887e-05, "loss": 0.0119, "step": 27770 }, { "epoch": 37.18875502008032, "grad_norm": 0.23975148797035217, "learning_rate": 8.64721920176972e-05, "loss": 0.0137, "step": 27780 }, { "epoch": 37.20214190093708, "grad_norm": 0.18126031756401062, "learning_rate": 8.646087961706164e-05, "loss": 0.0127, "step": 27790 }, { "epoch": 37.21552878179384, "grad_norm": 0.5268206596374512, "learning_rate": 8.644956322911908e-05, "loss": 0.0114, "step": 27800 }, { "epoch": 37.2289156626506, "grad_norm": 0.2812856435775757, "learning_rate": 8.643824285510709e-05, "loss": 0.0143, "step": 27810 }, { "epoch": 37.242302543507364, "grad_norm": 0.24443963170051575, "learning_rate": 8.642691849626364e-05, "loss": 0.0129, "step": 27820 }, { "epoch": 37.25568942436412, "grad_norm": 0.21039921045303345, "learning_rate": 8.641559015382717e-05, "loss": 0.011, "step": 27830 }, { "epoch": 37.269076305220885, "grad_norm": 0.2911384105682373, "learning_rate": 8.640425782903649e-05, "loss": 0.0115, "step": 27840 }, { "epoch": 37.28246318607764, "grad_norm": 0.1874523162841797, "learning_rate": 8.639292152313091e-05, "loss": 0.0134, "step": 27850 }, { "epoch": 37.295850066934406, "grad_norm": 0.16079406440258026, "learning_rate": 8.638158123735015e-05, "loss": 0.0082, "step": 27860 }, { "epoch": 37.30923694779116, "grad_norm": 0.30238017439842224, "learning_rate": 8.637023697293436e-05, "loss": 0.0088, "step": 27870 }, { "epoch": 37.32262382864793, "grad_norm": 0.8610121607780457, "learning_rate": 8.635888873112414e-05, "loss": 0.0119, "step": 27880 }, { "epoch": 37.336010709504684, "grad_norm": 0.17682300508022308, "learning_rate": 8.634753651316052e-05, "loss": 0.0097, "step": 27890 }, { "epoch": 37.34939759036145, "grad_norm": 0.27400508522987366, "learning_rate": 8.633618032028496e-05, "loss": 0.012, "step": 27900 }, { "epoch": 37.362784471218205, "grad_norm": 0.4446928799152374, "learning_rate": 8.632482015373934e-05, "loss": 0.012, "step": 27910 }, { "epoch": 37.37617135207497, "grad_norm": 0.17081260681152344, "learning_rate": 8.6313456014766e-05, "loss": 0.0142, "step": 27920 }, { "epoch": 37.389558232931726, "grad_norm": 0.176758274435997, "learning_rate": 8.630208790460771e-05, "loss": 0.0126, "step": 27930 }, { "epoch": 37.40294511378849, "grad_norm": 0.2780485451221466, "learning_rate": 8.629071582450768e-05, "loss": 0.0138, "step": 27940 }, { "epoch": 37.41633199464525, "grad_norm": 0.16193333268165588, "learning_rate": 8.62793397757095e-05, "loss": 0.0129, "step": 27950 }, { "epoch": 37.429718875502004, "grad_norm": 0.23905088007450104, "learning_rate": 8.626795975945729e-05, "loss": 0.0126, "step": 27960 }, { "epoch": 37.44310575635877, "grad_norm": 0.22778427600860596, "learning_rate": 8.625657577699551e-05, "loss": 0.0132, "step": 27970 }, { "epoch": 37.456492637215526, "grad_norm": 0.2982189655303955, "learning_rate": 8.624518782956914e-05, "loss": 0.0127, "step": 27980 }, { "epoch": 37.46987951807229, "grad_norm": 0.3289017677307129, "learning_rate": 8.62337959184235e-05, "loss": 0.0139, "step": 27990 }, { "epoch": 37.48326639892905, "grad_norm": 0.14668263494968414, "learning_rate": 8.622240004480441e-05, "loss": 0.0111, "step": 28000 }, { "epoch": 37.49665327978581, "grad_norm": 0.2383451759815216, "learning_rate": 8.621100020995814e-05, "loss": 0.0109, "step": 28010 }, { "epoch": 37.51004016064257, "grad_norm": 0.1912018060684204, "learning_rate": 8.619959641513132e-05, "loss": 0.0131, "step": 28020 }, { "epoch": 37.52342704149933, "grad_norm": 0.28663361072540283, "learning_rate": 8.618818866157105e-05, "loss": 0.0142, "step": 28030 }, { "epoch": 37.53681392235609, "grad_norm": 0.28798389434814453, "learning_rate": 8.617677695052487e-05, "loss": 0.0124, "step": 28040 }, { "epoch": 37.55020080321285, "grad_norm": 0.21010002493858337, "learning_rate": 8.616536128324078e-05, "loss": 0.0104, "step": 28050 }, { "epoch": 37.56358768406961, "grad_norm": 0.19450777769088745, "learning_rate": 8.615394166096712e-05, "loss": 0.0099, "step": 28060 }, { "epoch": 37.576974564926374, "grad_norm": 0.5762884020805359, "learning_rate": 8.614251808495279e-05, "loss": 0.0121, "step": 28070 }, { "epoch": 37.59036144578313, "grad_norm": 0.38206613063812256, "learning_rate": 8.6131090556447e-05, "loss": 0.0119, "step": 28080 }, { "epoch": 37.603748326639895, "grad_norm": 0.14074963331222534, "learning_rate": 8.611965907669947e-05, "loss": 0.0091, "step": 28090 }, { "epoch": 37.61713520749665, "grad_norm": 0.1941290646791458, "learning_rate": 8.610822364696034e-05, "loss": 0.01, "step": 28100 }, { "epoch": 37.630522088353416, "grad_norm": 0.17995290458202362, "learning_rate": 8.609678426848015e-05, "loss": 0.0118, "step": 28110 }, { "epoch": 37.64390896921017, "grad_norm": 0.5784058570861816, "learning_rate": 8.60853409425099e-05, "loss": 0.0128, "step": 28120 }, { "epoch": 37.65729585006694, "grad_norm": 0.22060498595237732, "learning_rate": 8.607389367030104e-05, "loss": 0.0108, "step": 28130 }, { "epoch": 37.670682730923694, "grad_norm": 0.1494239717721939, "learning_rate": 8.606244245310538e-05, "loss": 0.0123, "step": 28140 }, { "epoch": 37.68406961178046, "grad_norm": 0.2688482999801636, "learning_rate": 8.605098729217525e-05, "loss": 0.011, "step": 28150 }, { "epoch": 37.697456492637215, "grad_norm": 0.17021439969539642, "learning_rate": 8.603952818876335e-05, "loss": 0.0124, "step": 28160 }, { "epoch": 37.71084337349397, "grad_norm": 0.19012898206710815, "learning_rate": 8.602806514412281e-05, "loss": 0.0112, "step": 28170 }, { "epoch": 37.724230254350736, "grad_norm": 0.273877888917923, "learning_rate": 8.601659815950726e-05, "loss": 0.0132, "step": 28180 }, { "epoch": 37.73761713520749, "grad_norm": 0.2938168942928314, "learning_rate": 8.600512723617067e-05, "loss": 0.0116, "step": 28190 }, { "epoch": 37.75100401606426, "grad_norm": 0.1718461513519287, "learning_rate": 8.59936523753675e-05, "loss": 0.0122, "step": 28200 }, { "epoch": 37.764390896921014, "grad_norm": 0.1674349159002304, "learning_rate": 8.598217357835264e-05, "loss": 0.0107, "step": 28210 }, { "epoch": 37.77777777777778, "grad_norm": 0.27617812156677246, "learning_rate": 8.597069084638135e-05, "loss": 0.0117, "step": 28220 }, { "epoch": 37.791164658634536, "grad_norm": 0.2575015723705292, "learning_rate": 8.595920418070939e-05, "loss": 0.0083, "step": 28230 }, { "epoch": 37.8045515394913, "grad_norm": 0.24595461785793304, "learning_rate": 8.594771358259295e-05, "loss": 0.0107, "step": 28240 }, { "epoch": 37.81793842034806, "grad_norm": 0.18662163615226746, "learning_rate": 8.593621905328858e-05, "loss": 0.0105, "step": 28250 }, { "epoch": 37.83132530120482, "grad_norm": 0.27631354331970215, "learning_rate": 8.592472059405333e-05, "loss": 0.0116, "step": 28260 }, { "epoch": 37.84471218206158, "grad_norm": 0.22387395799160004, "learning_rate": 8.591321820614464e-05, "loss": 0.0122, "step": 28270 }, { "epoch": 37.85809906291834, "grad_norm": 0.21190199255943298, "learning_rate": 8.590171189082041e-05, "loss": 0.0126, "step": 28280 }, { "epoch": 37.8714859437751, "grad_norm": 0.22901657223701477, "learning_rate": 8.589020164933894e-05, "loss": 0.0157, "step": 28290 }, { "epoch": 37.88487282463186, "grad_norm": 0.22851328551769257, "learning_rate": 8.587868748295898e-05, "loss": 0.0129, "step": 28300 }, { "epoch": 37.89825970548862, "grad_norm": 0.242429718375206, "learning_rate": 8.586716939293971e-05, "loss": 0.0118, "step": 28310 }, { "epoch": 37.911646586345384, "grad_norm": 0.11722859740257263, "learning_rate": 8.58556473805407e-05, "loss": 0.0095, "step": 28320 }, { "epoch": 37.92503346720214, "grad_norm": 0.25005102157592773, "learning_rate": 8.584412144702202e-05, "loss": 0.0129, "step": 28330 }, { "epoch": 37.938420348058905, "grad_norm": 0.3500254154205322, "learning_rate": 8.58325915936441e-05, "loss": 0.0115, "step": 28340 }, { "epoch": 37.95180722891566, "grad_norm": 0.15714667737483978, "learning_rate": 8.582105782166783e-05, "loss": 0.0111, "step": 28350 }, { "epoch": 37.965194109772426, "grad_norm": 0.3556850254535675, "learning_rate": 8.580952013235455e-05, "loss": 0.0121, "step": 28360 }, { "epoch": 37.97858099062918, "grad_norm": 0.32793989777565, "learning_rate": 8.579797852696596e-05, "loss": 0.0115, "step": 28370 }, { "epoch": 37.99196787148595, "grad_norm": 0.17929066717624664, "learning_rate": 8.578643300676428e-05, "loss": 0.0105, "step": 28380 }, { "epoch": 38.005354752342704, "grad_norm": 1.2201396226882935, "learning_rate": 8.577488357301209e-05, "loss": 0.0115, "step": 28390 }, { "epoch": 38.01874163319946, "grad_norm": 0.256551593542099, "learning_rate": 8.576333022697242e-05, "loss": 0.0115, "step": 28400 }, { "epoch": 38.032128514056225, "grad_norm": 0.2158854603767395, "learning_rate": 8.575177296990873e-05, "loss": 0.0117, "step": 28410 }, { "epoch": 38.04551539491298, "grad_norm": 0.18972119688987732, "learning_rate": 8.574021180308489e-05, "loss": 0.0111, "step": 28420 }, { "epoch": 38.058902275769746, "grad_norm": 0.3735501766204834, "learning_rate": 8.572864672776523e-05, "loss": 0.0092, "step": 28430 }, { "epoch": 38.0722891566265, "grad_norm": 0.24826067686080933, "learning_rate": 8.571707774521447e-05, "loss": 0.0124, "step": 28440 }, { "epoch": 38.08567603748327, "grad_norm": 0.31308460235595703, "learning_rate": 8.57055048566978e-05, "loss": 0.0115, "step": 28450 }, { "epoch": 38.099062918340024, "grad_norm": 0.2466409057378769, "learning_rate": 8.569392806348078e-05, "loss": 0.0116, "step": 28460 }, { "epoch": 38.11244979919679, "grad_norm": 0.20741285383701324, "learning_rate": 8.568234736682947e-05, "loss": 0.0129, "step": 28470 }, { "epoch": 38.125836680053546, "grad_norm": 0.232773095369339, "learning_rate": 8.567076276801029e-05, "loss": 0.0113, "step": 28480 }, { "epoch": 38.13922356091031, "grad_norm": 0.17886051535606384, "learning_rate": 8.565917426829013e-05, "loss": 0.0105, "step": 28490 }, { "epoch": 38.15261044176707, "grad_norm": 0.21213866770267487, "learning_rate": 8.564758186893628e-05, "loss": 0.0107, "step": 28500 }, { "epoch": 38.16599732262383, "grad_norm": 0.24795718491077423, "learning_rate": 8.563598557121649e-05, "loss": 0.0116, "step": 28510 }, { "epoch": 38.17938420348059, "grad_norm": 0.5777920484542847, "learning_rate": 8.562438537639888e-05, "loss": 0.0096, "step": 28520 }, { "epoch": 38.19277108433735, "grad_norm": 0.22319327294826508, "learning_rate": 8.561278128575206e-05, "loss": 0.0129, "step": 28530 }, { "epoch": 38.20615796519411, "grad_norm": 0.30286821722984314, "learning_rate": 8.5601173300545e-05, "loss": 0.0116, "step": 28540 }, { "epoch": 38.21954484605087, "grad_norm": 0.2265380620956421, "learning_rate": 8.558956142204717e-05, "loss": 0.0109, "step": 28550 }, { "epoch": 38.23293172690763, "grad_norm": 0.1839301884174347, "learning_rate": 8.55779456515284e-05, "loss": 0.01, "step": 28560 }, { "epoch": 38.246318607764394, "grad_norm": 0.1758463978767395, "learning_rate": 8.556632599025898e-05, "loss": 0.0119, "step": 28570 }, { "epoch": 38.25970548862115, "grad_norm": 0.19543728232383728, "learning_rate": 8.555470243950964e-05, "loss": 0.0105, "step": 28580 }, { "epoch": 38.273092369477915, "grad_norm": 0.18813854455947876, "learning_rate": 8.554307500055148e-05, "loss": 0.01, "step": 28590 }, { "epoch": 38.28647925033467, "grad_norm": 0.18236416578292847, "learning_rate": 8.553144367465609e-05, "loss": 0.0129, "step": 28600 }, { "epoch": 38.29986613119143, "grad_norm": 0.4342077970504761, "learning_rate": 8.551980846309544e-05, "loss": 0.0142, "step": 28610 }, { "epoch": 38.31325301204819, "grad_norm": 0.7455545663833618, "learning_rate": 8.550816936714193e-05, "loss": 0.0113, "step": 28620 }, { "epoch": 38.32663989290495, "grad_norm": 0.6324751377105713, "learning_rate": 8.549652638806841e-05, "loss": 0.011, "step": 28630 }, { "epoch": 38.340026773761714, "grad_norm": 0.7226624488830566, "learning_rate": 8.548487952714812e-05, "loss": 0.0103, "step": 28640 }, { "epoch": 38.35341365461847, "grad_norm": 0.2615845799446106, "learning_rate": 8.547322878565478e-05, "loss": 0.0114, "step": 28650 }, { "epoch": 38.366800535475235, "grad_norm": 0.19428037106990814, "learning_rate": 8.546157416486245e-05, "loss": 0.0105, "step": 28660 }, { "epoch": 38.38018741633199, "grad_norm": 0.17911025881767273, "learning_rate": 8.54499156660457e-05, "loss": 0.0113, "step": 28670 }, { "epoch": 38.393574297188756, "grad_norm": 0.20157068967819214, "learning_rate": 8.543825329047947e-05, "loss": 0.0106, "step": 28680 }, { "epoch": 38.40696117804551, "grad_norm": 0.43565067648887634, "learning_rate": 8.542658703943913e-05, "loss": 0.0115, "step": 28690 }, { "epoch": 38.42034805890228, "grad_norm": 0.23917005956172943, "learning_rate": 8.541491691420051e-05, "loss": 0.0134, "step": 28700 }, { "epoch": 38.433734939759034, "grad_norm": 0.5923860669136047, "learning_rate": 8.54032429160398e-05, "loss": 0.0118, "step": 28710 }, { "epoch": 38.4471218206158, "grad_norm": 0.3661447763442993, "learning_rate": 8.539156504623369e-05, "loss": 0.0124, "step": 28720 }, { "epoch": 38.460508701472556, "grad_norm": 0.18660365045070648, "learning_rate": 8.537988330605923e-05, "loss": 0.0117, "step": 28730 }, { "epoch": 38.47389558232932, "grad_norm": 0.34589266777038574, "learning_rate": 8.536819769679393e-05, "loss": 0.011, "step": 28740 }, { "epoch": 38.48728246318608, "grad_norm": 0.1404637098312378, "learning_rate": 8.53565082197157e-05, "loss": 0.0105, "step": 28750 }, { "epoch": 38.50066934404284, "grad_norm": 0.2895386815071106, "learning_rate": 8.534481487610289e-05, "loss": 0.012, "step": 28760 }, { "epoch": 38.5140562248996, "grad_norm": 0.7402357459068298, "learning_rate": 8.533311766723428e-05, "loss": 0.0099, "step": 28770 }, { "epoch": 38.52744310575636, "grad_norm": 0.23525665700435638, "learning_rate": 8.532141659438901e-05, "loss": 0.0143, "step": 28780 }, { "epoch": 38.54082998661312, "grad_norm": 0.2188630849123001, "learning_rate": 8.530971165884675e-05, "loss": 0.0113, "step": 28790 }, { "epoch": 38.55421686746988, "grad_norm": 0.21508029103279114, "learning_rate": 8.529800286188752e-05, "loss": 0.0104, "step": 28800 }, { "epoch": 38.56760374832664, "grad_norm": 0.17029990255832672, "learning_rate": 8.528629020479175e-05, "loss": 0.0109, "step": 28810 }, { "epoch": 38.5809906291834, "grad_norm": 0.178071528673172, "learning_rate": 8.527457368884033e-05, "loss": 0.0099, "step": 28820 }, { "epoch": 38.59437751004016, "grad_norm": 0.10795144736766815, "learning_rate": 8.526285331531458e-05, "loss": 0.0102, "step": 28830 }, { "epoch": 38.60776439089692, "grad_norm": 0.19268731772899628, "learning_rate": 8.525112908549621e-05, "loss": 0.0126, "step": 28840 }, { "epoch": 38.62115127175368, "grad_norm": 0.24241240322589874, "learning_rate": 8.523940100066735e-05, "loss": 0.0108, "step": 28850 }, { "epoch": 38.63453815261044, "grad_norm": 0.1888023018836975, "learning_rate": 8.52276690621106e-05, "loss": 0.0132, "step": 28860 }, { "epoch": 38.6479250334672, "grad_norm": 0.1602744460105896, "learning_rate": 8.521593327110889e-05, "loss": 0.0097, "step": 28870 }, { "epoch": 38.66131191432396, "grad_norm": 0.15527784824371338, "learning_rate": 8.520419362894569e-05, "loss": 0.0113, "step": 28880 }, { "epoch": 38.674698795180724, "grad_norm": 0.21794703602790833, "learning_rate": 8.51924501369048e-05, "loss": 0.0099, "step": 28890 }, { "epoch": 38.68808567603748, "grad_norm": 0.19423913955688477, "learning_rate": 8.518070279627047e-05, "loss": 0.0135, "step": 28900 }, { "epoch": 38.701472556894245, "grad_norm": 0.18969884514808655, "learning_rate": 8.516895160832737e-05, "loss": 0.0104, "step": 28910 }, { "epoch": 38.714859437751, "grad_norm": 0.37730130553245544, "learning_rate": 8.515719657436061e-05, "loss": 0.0102, "step": 28920 }, { "epoch": 38.728246318607766, "grad_norm": 0.15299402177333832, "learning_rate": 8.514543769565568e-05, "loss": 0.0129, "step": 28930 }, { "epoch": 38.74163319946452, "grad_norm": 0.18600550293922424, "learning_rate": 8.513367497349853e-05, "loss": 0.013, "step": 28940 }, { "epoch": 38.75502008032129, "grad_norm": 0.20427151024341583, "learning_rate": 8.51219084091755e-05, "loss": 0.0101, "step": 28950 }, { "epoch": 38.768406961178044, "grad_norm": 0.4984550476074219, "learning_rate": 8.511013800397338e-05, "loss": 0.0128, "step": 28960 }, { "epoch": 38.78179384203481, "grad_norm": 0.19039607048034668, "learning_rate": 8.509836375917937e-05, "loss": 0.0126, "step": 28970 }, { "epoch": 38.795180722891565, "grad_norm": 0.1619228720664978, "learning_rate": 8.508658567608104e-05, "loss": 0.0113, "step": 28980 }, { "epoch": 38.80856760374833, "grad_norm": 0.36731183528900146, "learning_rate": 8.507480375596647e-05, "loss": 0.0139, "step": 28990 }, { "epoch": 38.82195448460509, "grad_norm": 0.2815130054950714, "learning_rate": 8.506301800012408e-05, "loss": 0.0109, "step": 29000 }, { "epoch": 38.83534136546185, "grad_norm": 0.22988741099834442, "learning_rate": 8.505122840984278e-05, "loss": 0.0117, "step": 29010 }, { "epoch": 38.84872824631861, "grad_norm": 0.22398555278778076, "learning_rate": 8.503943498641182e-05, "loss": 0.011, "step": 29020 }, { "epoch": 38.862115127175365, "grad_norm": 1.6630762815475464, "learning_rate": 8.502763773112095e-05, "loss": 0.0125, "step": 29030 }, { "epoch": 38.87550200803213, "grad_norm": 0.4772275686264038, "learning_rate": 8.501583664526026e-05, "loss": 0.0107, "step": 29040 }, { "epoch": 38.888888888888886, "grad_norm": 0.16143766045570374, "learning_rate": 8.500403173012032e-05, "loss": 0.0106, "step": 29050 }, { "epoch": 38.90227576974565, "grad_norm": 0.2313656061887741, "learning_rate": 8.499222298699211e-05, "loss": 0.0097, "step": 29060 }, { "epoch": 38.91566265060241, "grad_norm": 0.11350122839212418, "learning_rate": 8.498041041716701e-05, "loss": 0.0133, "step": 29070 }, { "epoch": 38.92904953145917, "grad_norm": 0.21708106994628906, "learning_rate": 8.496859402193681e-05, "loss": 0.0122, "step": 29080 }, { "epoch": 38.94243641231593, "grad_norm": 0.3092435896396637, "learning_rate": 8.495677380259374e-05, "loss": 0.0133, "step": 29090 }, { "epoch": 38.95582329317269, "grad_norm": 0.18437853455543518, "learning_rate": 8.494494976043045e-05, "loss": 0.0103, "step": 29100 }, { "epoch": 38.96921017402945, "grad_norm": 0.20448951423168182, "learning_rate": 8.493312189673998e-05, "loss": 0.0127, "step": 29110 }, { "epoch": 38.98259705488621, "grad_norm": 0.198774054646492, "learning_rate": 8.492129021281584e-05, "loss": 0.0096, "step": 29120 }, { "epoch": 38.99598393574297, "grad_norm": 0.2013980895280838, "learning_rate": 8.490945470995188e-05, "loss": 0.0099, "step": 29130 }, { "epoch": 39.009370816599734, "grad_norm": 0.19731050729751587, "learning_rate": 8.489761538944247e-05, "loss": 0.0102, "step": 29140 }, { "epoch": 39.02275769745649, "grad_norm": 0.3208296597003937, "learning_rate": 8.48857722525823e-05, "loss": 0.0133, "step": 29150 }, { "epoch": 39.036144578313255, "grad_norm": 0.18474751710891724, "learning_rate": 8.487392530066652e-05, "loss": 0.0095, "step": 29160 }, { "epoch": 39.04953145917001, "grad_norm": 0.31709912419319153, "learning_rate": 8.486207453499069e-05, "loss": 0.0127, "step": 29170 }, { "epoch": 39.062918340026776, "grad_norm": 0.6282066106796265, "learning_rate": 8.485021995685082e-05, "loss": 0.0132, "step": 29180 }, { "epoch": 39.07630522088353, "grad_norm": 0.200654074549675, "learning_rate": 8.483836156754328e-05, "loss": 0.0129, "step": 29190 }, { "epoch": 39.0896921017403, "grad_norm": 0.35686782002449036, "learning_rate": 8.482649936836491e-05, "loss": 0.0113, "step": 29200 }, { "epoch": 39.103078982597054, "grad_norm": 0.28421667218208313, "learning_rate": 8.481463336061293e-05, "loss": 0.0124, "step": 29210 }, { "epoch": 39.11646586345382, "grad_norm": 0.19103649258613586, "learning_rate": 8.480276354558496e-05, "loss": 0.0113, "step": 29220 }, { "epoch": 39.129852744310575, "grad_norm": 0.16367056965827942, "learning_rate": 8.479088992457913e-05, "loss": 0.0099, "step": 29230 }, { "epoch": 39.14323962516734, "grad_norm": 0.17301222681999207, "learning_rate": 8.477901249889387e-05, "loss": 0.0126, "step": 29240 }, { "epoch": 39.1566265060241, "grad_norm": 0.3499949276447296, "learning_rate": 8.47671312698281e-05, "loss": 0.0109, "step": 29250 }, { "epoch": 39.17001338688085, "grad_norm": 0.3111702799797058, "learning_rate": 8.475524623868112e-05, "loss": 0.0109, "step": 29260 }, { "epoch": 39.18340026773762, "grad_norm": 0.8100304007530212, "learning_rate": 8.474335740675266e-05, "loss": 0.0118, "step": 29270 }, { "epoch": 39.196787148594375, "grad_norm": 0.18351377546787262, "learning_rate": 8.473146477534289e-05, "loss": 0.0106, "step": 29280 }, { "epoch": 39.21017402945114, "grad_norm": 0.23356804251670837, "learning_rate": 8.471956834575232e-05, "loss": 0.0096, "step": 29290 }, { "epoch": 39.223560910307896, "grad_norm": 0.17563612759113312, "learning_rate": 8.470766811928197e-05, "loss": 0.0114, "step": 29300 }, { "epoch": 39.23694779116466, "grad_norm": 0.40434494614601135, "learning_rate": 8.469576409723323e-05, "loss": 0.0135, "step": 29310 }, { "epoch": 39.25033467202142, "grad_norm": 0.17498478293418884, "learning_rate": 8.468385628090788e-05, "loss": 0.0103, "step": 29320 }, { "epoch": 39.26372155287818, "grad_norm": 0.5603541731834412, "learning_rate": 8.467194467160815e-05, "loss": 0.0113, "step": 29330 }, { "epoch": 39.27710843373494, "grad_norm": 0.18921160697937012, "learning_rate": 8.466002927063667e-05, "loss": 0.0113, "step": 29340 }, { "epoch": 39.2904953145917, "grad_norm": 0.23783542215824127, "learning_rate": 8.464811007929651e-05, "loss": 0.0124, "step": 29350 }, { "epoch": 39.30388219544846, "grad_norm": 0.15825238823890686, "learning_rate": 8.463618709889114e-05, "loss": 0.0096, "step": 29360 }, { "epoch": 39.31726907630522, "grad_norm": 0.21370193362236023, "learning_rate": 8.462426033072442e-05, "loss": 0.0117, "step": 29370 }, { "epoch": 39.33065595716198, "grad_norm": 0.1470271497964859, "learning_rate": 8.461232977610061e-05, "loss": 0.013, "step": 29380 }, { "epoch": 39.344042838018744, "grad_norm": 0.15793487429618835, "learning_rate": 8.46003954363245e-05, "loss": 0.0095, "step": 29390 }, { "epoch": 39.3574297188755, "grad_norm": 0.1385691910982132, "learning_rate": 8.458845731270115e-05, "loss": 0.0097, "step": 29400 }, { "epoch": 39.370816599732265, "grad_norm": 0.30344170331954956, "learning_rate": 8.45765154065361e-05, "loss": 0.0101, "step": 29410 }, { "epoch": 39.38420348058902, "grad_norm": 0.26500681042671204, "learning_rate": 8.456456971913532e-05, "loss": 0.0134, "step": 29420 }, { "epoch": 39.397590361445786, "grad_norm": 0.13171765208244324, "learning_rate": 8.455262025180517e-05, "loss": 0.0106, "step": 29430 }, { "epoch": 39.41097724230254, "grad_norm": 0.5623390674591064, "learning_rate": 8.454066700585242e-05, "loss": 0.0122, "step": 29440 }, { "epoch": 39.42436412315931, "grad_norm": 0.14307758212089539, "learning_rate": 8.452870998258423e-05, "loss": 0.0109, "step": 29450 }, { "epoch": 39.437751004016064, "grad_norm": 0.4650160074234009, "learning_rate": 8.451674918330825e-05, "loss": 0.0153, "step": 29460 }, { "epoch": 39.45113788487282, "grad_norm": 0.23903504014015198, "learning_rate": 8.450478460933246e-05, "loss": 0.0084, "step": 29470 }, { "epoch": 39.464524765729585, "grad_norm": 0.6981005668640137, "learning_rate": 8.449281626196532e-05, "loss": 0.0095, "step": 29480 }, { "epoch": 39.47791164658634, "grad_norm": 0.22496454417705536, "learning_rate": 8.448084414251564e-05, "loss": 0.0121, "step": 29490 }, { "epoch": 39.49129852744311, "grad_norm": 0.18896785378456116, "learning_rate": 8.446886825229271e-05, "loss": 0.0112, "step": 29500 }, { "epoch": 39.50468540829986, "grad_norm": 0.3393476605415344, "learning_rate": 8.445688859260615e-05, "loss": 0.0108, "step": 29510 }, { "epoch": 39.51807228915663, "grad_norm": 0.16570405662059784, "learning_rate": 8.444490516476606e-05, "loss": 0.011, "step": 29520 }, { "epoch": 39.531459170013385, "grad_norm": 0.3002873361110687, "learning_rate": 8.443291797008293e-05, "loss": 0.0116, "step": 29530 }, { "epoch": 39.54484605087015, "grad_norm": 0.14351822435855865, "learning_rate": 8.442092700986765e-05, "loss": 0.0118, "step": 29540 }, { "epoch": 39.558232931726906, "grad_norm": 0.22062981128692627, "learning_rate": 8.440893228543156e-05, "loss": 0.012, "step": 29550 }, { "epoch": 39.57161981258367, "grad_norm": 0.16689586639404297, "learning_rate": 8.439693379808638e-05, "loss": 0.011, "step": 29560 }, { "epoch": 39.58500669344043, "grad_norm": 0.17058449983596802, "learning_rate": 8.43849315491442e-05, "loss": 0.0099, "step": 29570 }, { "epoch": 39.59839357429719, "grad_norm": 0.18965834379196167, "learning_rate": 8.437292553991763e-05, "loss": 0.0094, "step": 29580 }, { "epoch": 39.61178045515395, "grad_norm": 0.2862060070037842, "learning_rate": 8.436091577171959e-05, "loss": 0.0105, "step": 29590 }, { "epoch": 39.62516733601071, "grad_norm": 0.3355622887611389, "learning_rate": 8.434890224586347e-05, "loss": 0.0111, "step": 29600 }, { "epoch": 39.63855421686747, "grad_norm": 0.3069291412830353, "learning_rate": 8.433688496366303e-05, "loss": 0.0144, "step": 29610 }, { "epoch": 39.65194109772423, "grad_norm": 0.17502126097679138, "learning_rate": 8.432486392643248e-05, "loss": 0.0089, "step": 29620 }, { "epoch": 39.66532797858099, "grad_norm": 0.1432102471590042, "learning_rate": 8.431283913548643e-05, "loss": 0.0109, "step": 29630 }, { "epoch": 39.678714859437754, "grad_norm": 0.2083771824836731, "learning_rate": 8.430081059213985e-05, "loss": 0.0105, "step": 29640 }, { "epoch": 39.69210174029451, "grad_norm": 0.23248149454593658, "learning_rate": 8.428877829770823e-05, "loss": 0.01, "step": 29650 }, { "epoch": 39.705488621151275, "grad_norm": 0.18051357567310333, "learning_rate": 8.427674225350735e-05, "loss": 0.0139, "step": 29660 }, { "epoch": 39.71887550200803, "grad_norm": 0.18657603859901428, "learning_rate": 8.426470246085347e-05, "loss": 0.0122, "step": 29670 }, { "epoch": 39.73226238286479, "grad_norm": 0.24808189272880554, "learning_rate": 8.425265892106324e-05, "loss": 0.009, "step": 29680 }, { "epoch": 39.74564926372155, "grad_norm": 0.1813153326511383, "learning_rate": 8.424061163545374e-05, "loss": 0.0092, "step": 29690 }, { "epoch": 39.75903614457831, "grad_norm": 0.13007137179374695, "learning_rate": 8.422856060534243e-05, "loss": 0.0093, "step": 29700 }, { "epoch": 39.772423025435074, "grad_norm": 0.19793395698070526, "learning_rate": 8.421650583204718e-05, "loss": 0.0104, "step": 29710 }, { "epoch": 39.78580990629183, "grad_norm": 0.20954366028308868, "learning_rate": 8.420444731688633e-05, "loss": 0.0118, "step": 29720 }, { "epoch": 39.799196787148595, "grad_norm": 0.2784239649772644, "learning_rate": 8.419238506117852e-05, "loss": 0.0112, "step": 29730 }, { "epoch": 39.81258366800535, "grad_norm": 0.30810895562171936, "learning_rate": 8.418031906624289e-05, "loss": 0.0108, "step": 29740 }, { "epoch": 39.82597054886212, "grad_norm": 0.14735880494117737, "learning_rate": 8.416824933339898e-05, "loss": 0.0089, "step": 29750 }, { "epoch": 39.83935742971887, "grad_norm": 0.367733359336853, "learning_rate": 8.415617586396667e-05, "loss": 0.0102, "step": 29760 }, { "epoch": 39.85274431057564, "grad_norm": 0.17596517503261566, "learning_rate": 8.414409865926632e-05, "loss": 0.0083, "step": 29770 }, { "epoch": 39.866131191432395, "grad_norm": 0.1857745200395584, "learning_rate": 8.413201772061867e-05, "loss": 0.011, "step": 29780 }, { "epoch": 39.87951807228916, "grad_norm": 0.1979684829711914, "learning_rate": 8.411993304934488e-05, "loss": 0.0097, "step": 29790 }, { "epoch": 39.892904953145916, "grad_norm": 0.1941726803779602, "learning_rate": 8.410784464676654e-05, "loss": 0.0106, "step": 29800 }, { "epoch": 39.90629183400268, "grad_norm": 0.16479358077049255, "learning_rate": 8.409575251420556e-05, "loss": 0.0132, "step": 29810 }, { "epoch": 39.91967871485944, "grad_norm": 0.20574136078357697, "learning_rate": 8.408365665298435e-05, "loss": 0.0104, "step": 29820 }, { "epoch": 39.9330655957162, "grad_norm": 0.17269504070281982, "learning_rate": 8.40715570644257e-05, "loss": 0.0113, "step": 29830 }, { "epoch": 39.94645247657296, "grad_norm": 0.17506401240825653, "learning_rate": 8.40594537498528e-05, "loss": 0.0121, "step": 29840 }, { "epoch": 39.95983935742972, "grad_norm": 0.1727272868156433, "learning_rate": 8.404734671058924e-05, "loss": 0.0102, "step": 29850 }, { "epoch": 39.97322623828648, "grad_norm": 0.20359928905963898, "learning_rate": 8.403523594795902e-05, "loss": 0.0114, "step": 29860 }, { "epoch": 39.98661311914324, "grad_norm": 0.18413054943084717, "learning_rate": 8.402312146328659e-05, "loss": 0.0129, "step": 29870 }, { "epoch": 40.0, "grad_norm": 0.28011149168014526, "learning_rate": 8.401100325789675e-05, "loss": 0.0122, "step": 29880 }, { "epoch": 40.01338688085676, "grad_norm": 0.22825433313846588, "learning_rate": 8.399888133311472e-05, "loss": 0.0126, "step": 29890 }, { "epoch": 40.02677376171352, "grad_norm": 0.19827324151992798, "learning_rate": 8.398675569026613e-05, "loss": 0.0101, "step": 29900 }, { "epoch": 40.04016064257028, "grad_norm": 0.25920936465263367, "learning_rate": 8.397462633067705e-05, "loss": 0.0111, "step": 29910 }, { "epoch": 40.05354752342704, "grad_norm": 0.2500442862510681, "learning_rate": 8.396249325567392e-05, "loss": 0.0133, "step": 29920 }, { "epoch": 40.0669344042838, "grad_norm": 0.20541468262672424, "learning_rate": 8.395035646658357e-05, "loss": 0.0104, "step": 29930 }, { "epoch": 40.08032128514056, "grad_norm": 0.14292527735233307, "learning_rate": 8.39382159647333e-05, "loss": 0.0104, "step": 29940 }, { "epoch": 40.09370816599732, "grad_norm": 0.22134281694889069, "learning_rate": 8.392607175145075e-05, "loss": 0.0119, "step": 29950 }, { "epoch": 40.107095046854084, "grad_norm": 0.2325550615787506, "learning_rate": 8.3913923828064e-05, "loss": 0.0112, "step": 29960 }, { "epoch": 40.12048192771084, "grad_norm": 0.17258532345294952, "learning_rate": 8.390177219590152e-05, "loss": 0.0125, "step": 29970 }, { "epoch": 40.133868808567605, "grad_norm": 0.24149090051651, "learning_rate": 8.388961685629222e-05, "loss": 0.0149, "step": 29980 }, { "epoch": 40.14725568942436, "grad_norm": 0.30358627438545227, "learning_rate": 8.387745781056536e-05, "loss": 0.011, "step": 29990 }, { "epoch": 40.16064257028113, "grad_norm": 0.28822362422943115, "learning_rate": 8.386529506005065e-05, "loss": 0.0124, "step": 30000 }, { "epoch": 40.17402945113788, "grad_norm": 0.23959267139434814, "learning_rate": 8.38531286060782e-05, "loss": 0.0106, "step": 30010 }, { "epoch": 40.18741633199465, "grad_norm": 0.19661220908164978, "learning_rate": 8.384095844997849e-05, "loss": 0.0094, "step": 30020 }, { "epoch": 40.200803212851405, "grad_norm": 0.24844041466712952, "learning_rate": 8.382878459308245e-05, "loss": 0.0103, "step": 30030 }, { "epoch": 40.21419009370817, "grad_norm": 0.22318072617053986, "learning_rate": 8.381660703672138e-05, "loss": 0.012, "step": 30040 }, { "epoch": 40.227576974564926, "grad_norm": 0.27968689799308777, "learning_rate": 8.380442578222702e-05, "loss": 0.0124, "step": 30050 }, { "epoch": 40.24096385542169, "grad_norm": 0.24304039776325226, "learning_rate": 8.379224083093146e-05, "loss": 0.0102, "step": 30060 }, { "epoch": 40.25435073627845, "grad_norm": 0.29620155692100525, "learning_rate": 8.378005218416727e-05, "loss": 0.0093, "step": 30070 }, { "epoch": 40.26773761713521, "grad_norm": 0.20260363817214966, "learning_rate": 8.376785984326735e-05, "loss": 0.0112, "step": 30080 }, { "epoch": 40.28112449799197, "grad_norm": 0.1766154170036316, "learning_rate": 8.375566380956506e-05, "loss": 0.0088, "step": 30090 }, { "epoch": 40.294511378848725, "grad_norm": 0.24359044432640076, "learning_rate": 8.374346408439411e-05, "loss": 0.0121, "step": 30100 }, { "epoch": 40.30789825970549, "grad_norm": 0.2019748091697693, "learning_rate": 8.373126066908868e-05, "loss": 0.0089, "step": 30110 }, { "epoch": 40.321285140562246, "grad_norm": 0.23318013548851013, "learning_rate": 8.371905356498326e-05, "loss": 0.0115, "step": 30120 }, { "epoch": 40.33467202141901, "grad_norm": 0.19844861328601837, "learning_rate": 8.370684277341288e-05, "loss": 0.0145, "step": 30130 }, { "epoch": 40.34805890227577, "grad_norm": 0.44041910767555237, "learning_rate": 8.369462829571282e-05, "loss": 0.0105, "step": 30140 }, { "epoch": 40.36144578313253, "grad_norm": 0.18256959319114685, "learning_rate": 8.36824101332189e-05, "loss": 0.0099, "step": 30150 }, { "epoch": 40.37483266398929, "grad_norm": 0.16207799315452576, "learning_rate": 8.367018828726721e-05, "loss": 0.0118, "step": 30160 }, { "epoch": 40.38821954484605, "grad_norm": 0.3003731369972229, "learning_rate": 8.365796275919438e-05, "loss": 0.0104, "step": 30170 }, { "epoch": 40.40160642570281, "grad_norm": 0.18904313445091248, "learning_rate": 8.364573355033734e-05, "loss": 0.0101, "step": 30180 }, { "epoch": 40.41499330655957, "grad_norm": 0.17248739302158356, "learning_rate": 8.363350066203346e-05, "loss": 0.0087, "step": 30190 }, { "epoch": 40.42838018741633, "grad_norm": 0.19751647114753723, "learning_rate": 8.362126409562053e-05, "loss": 0.0112, "step": 30200 }, { "epoch": 40.441767068273094, "grad_norm": 0.21574223041534424, "learning_rate": 8.360902385243667e-05, "loss": 0.0108, "step": 30210 }, { "epoch": 40.45515394912985, "grad_norm": 0.21449659764766693, "learning_rate": 8.359677993382052e-05, "loss": 0.0098, "step": 30220 }, { "epoch": 40.468540829986615, "grad_norm": 0.17735476791858673, "learning_rate": 8.358453234111103e-05, "loss": 0.0118, "step": 30230 }, { "epoch": 40.48192771084337, "grad_norm": 0.16725265979766846, "learning_rate": 8.357228107564756e-05, "loss": 0.0108, "step": 30240 }, { "epoch": 40.49531459170014, "grad_norm": 0.3076094686985016, "learning_rate": 8.356002613876993e-05, "loss": 0.0084, "step": 30250 }, { "epoch": 40.50870147255689, "grad_norm": 0.13450148701667786, "learning_rate": 8.35477675318183e-05, "loss": 0.0123, "step": 30260 }, { "epoch": 40.52208835341366, "grad_norm": 0.20795682072639465, "learning_rate": 8.353550525613323e-05, "loss": 0.0143, "step": 30270 }, { "epoch": 40.535475234270415, "grad_norm": 0.16846764087677002, "learning_rate": 8.352323931305572e-05, "loss": 0.0126, "step": 30280 }, { "epoch": 40.54886211512718, "grad_norm": 0.1347828358411789, "learning_rate": 8.351096970392717e-05, "loss": 0.0116, "step": 30290 }, { "epoch": 40.562248995983936, "grad_norm": 0.182078018784523, "learning_rate": 8.349869643008937e-05, "loss": 0.0146, "step": 30300 }, { "epoch": 40.57563587684069, "grad_norm": 0.22555126249790192, "learning_rate": 8.348641949288449e-05, "loss": 0.0082, "step": 30310 }, { "epoch": 40.58902275769746, "grad_norm": 0.3713618814945221, "learning_rate": 8.34741388936551e-05, "loss": 0.0104, "step": 30320 }, { "epoch": 40.602409638554214, "grad_norm": 0.21354541182518005, "learning_rate": 8.346185463374423e-05, "loss": 0.0132, "step": 30330 }, { "epoch": 40.61579651941098, "grad_norm": 0.1469285488128662, "learning_rate": 8.344956671449524e-05, "loss": 0.0102, "step": 30340 }, { "epoch": 40.629183400267735, "grad_norm": 0.5215903520584106, "learning_rate": 8.343727513725192e-05, "loss": 0.0096, "step": 30350 }, { "epoch": 40.6425702811245, "grad_norm": 0.3814716339111328, "learning_rate": 8.342497990335847e-05, "loss": 0.0127, "step": 30360 }, { "epoch": 40.655957161981256, "grad_norm": 0.23229651153087616, "learning_rate": 8.341268101415946e-05, "loss": 0.012, "step": 30370 }, { "epoch": 40.66934404283802, "grad_norm": 0.20730331540107727, "learning_rate": 8.34003784709999e-05, "loss": 0.0152, "step": 30380 }, { "epoch": 40.68273092369478, "grad_norm": 0.36969271302223206, "learning_rate": 8.338807227522516e-05, "loss": 0.0117, "step": 30390 }, { "epoch": 40.69611780455154, "grad_norm": 0.3387760818004608, "learning_rate": 8.337576242818103e-05, "loss": 0.0106, "step": 30400 }, { "epoch": 40.7095046854083, "grad_norm": 0.3063605725765228, "learning_rate": 8.336344893121372e-05, "loss": 0.0104, "step": 30410 }, { "epoch": 40.72289156626506, "grad_norm": 0.16352875530719757, "learning_rate": 8.335113178566977e-05, "loss": 0.0099, "step": 30420 }, { "epoch": 40.73627844712182, "grad_norm": 0.3657841682434082, "learning_rate": 8.33388109928962e-05, "loss": 0.0146, "step": 30430 }, { "epoch": 40.74966532797858, "grad_norm": 0.4852167069911957, "learning_rate": 8.33264865542404e-05, "loss": 0.0124, "step": 30440 }, { "epoch": 40.76305220883534, "grad_norm": 0.1524057239294052, "learning_rate": 8.331415847105013e-05, "loss": 0.0109, "step": 30450 }, { "epoch": 40.776439089692104, "grad_norm": 0.10011065006256104, "learning_rate": 8.330182674467357e-05, "loss": 0.0079, "step": 30460 }, { "epoch": 40.78982597054886, "grad_norm": 0.27771785855293274, "learning_rate": 8.32894913764593e-05, "loss": 0.0105, "step": 30470 }, { "epoch": 40.803212851405625, "grad_norm": 0.22762691974639893, "learning_rate": 8.327715236775633e-05, "loss": 0.0092, "step": 30480 }, { "epoch": 40.81659973226238, "grad_norm": 0.18942831456661224, "learning_rate": 8.326480971991398e-05, "loss": 0.0107, "step": 30490 }, { "epoch": 40.82998661311915, "grad_norm": 0.2222774624824524, "learning_rate": 8.325246343428206e-05, "loss": 0.0083, "step": 30500 }, { "epoch": 40.8433734939759, "grad_norm": 0.19218003749847412, "learning_rate": 8.324011351221072e-05, "loss": 0.0092, "step": 30510 }, { "epoch": 40.85676037483266, "grad_norm": 0.09083753824234009, "learning_rate": 8.322775995505057e-05, "loss": 0.0104, "step": 30520 }, { "epoch": 40.870147255689425, "grad_norm": 0.32608696818351746, "learning_rate": 8.321540276415254e-05, "loss": 0.0112, "step": 30530 }, { "epoch": 40.88353413654618, "grad_norm": 0.204985573887825, "learning_rate": 8.320304194086798e-05, "loss": 0.0103, "step": 30540 }, { "epoch": 40.896921017402946, "grad_norm": 0.2182147353887558, "learning_rate": 8.31906774865487e-05, "loss": 0.01, "step": 30550 }, { "epoch": 40.9103078982597, "grad_norm": 0.21369946002960205, "learning_rate": 8.317830940254682e-05, "loss": 0.0099, "step": 30560 }, { "epoch": 40.92369477911647, "grad_norm": 0.15752047300338745, "learning_rate": 8.316593769021491e-05, "loss": 0.0113, "step": 30570 }, { "epoch": 40.937081659973224, "grad_norm": 0.736398458480835, "learning_rate": 8.315356235090592e-05, "loss": 0.0107, "step": 30580 }, { "epoch": 40.95046854082999, "grad_norm": 0.2648525536060333, "learning_rate": 8.314118338597319e-05, "loss": 0.0103, "step": 30590 }, { "epoch": 40.963855421686745, "grad_norm": 0.34076789021492004, "learning_rate": 8.312880079677048e-05, "loss": 0.0108, "step": 30600 }, { "epoch": 40.97724230254351, "grad_norm": 0.18263471126556396, "learning_rate": 8.311641458465191e-05, "loss": 0.0086, "step": 30610 }, { "epoch": 40.990629183400266, "grad_norm": 0.14842753112316132, "learning_rate": 8.310402475097205e-05, "loss": 0.01, "step": 30620 }, { "epoch": 41.00401606425703, "grad_norm": 1.296816110610962, "learning_rate": 8.309163129708581e-05, "loss": 0.0109, "step": 30630 }, { "epoch": 41.01740294511379, "grad_norm": 0.2128210812807083, "learning_rate": 8.307923422434852e-05, "loss": 0.0114, "step": 30640 }, { "epoch": 41.03078982597055, "grad_norm": 0.2823532223701477, "learning_rate": 8.30668335341159e-05, "loss": 0.0094, "step": 30650 }, { "epoch": 41.04417670682731, "grad_norm": 0.2675648629665375, "learning_rate": 8.305442922774408e-05, "loss": 0.0095, "step": 30660 }, { "epoch": 41.05756358768407, "grad_norm": 0.22039711475372314, "learning_rate": 8.304202130658959e-05, "loss": 0.0095, "step": 30670 }, { "epoch": 41.07095046854083, "grad_norm": 0.18157191574573517, "learning_rate": 8.302960977200931e-05, "loss": 0.0083, "step": 30680 }, { "epoch": 41.08433734939759, "grad_norm": 0.38300275802612305, "learning_rate": 8.301719462536058e-05, "loss": 0.0098, "step": 30690 }, { "epoch": 41.09772423025435, "grad_norm": 0.16770444810390472, "learning_rate": 8.300477586800108e-05, "loss": 0.0105, "step": 30700 }, { "epoch": 41.111111111111114, "grad_norm": 0.19097910821437836, "learning_rate": 8.299235350128892e-05, "loss": 0.0134, "step": 30710 }, { "epoch": 41.12449799196787, "grad_norm": 0.19892902672290802, "learning_rate": 8.297992752658258e-05, "loss": 0.0094, "step": 30720 }, { "epoch": 41.137884872824635, "grad_norm": 0.18316690623760223, "learning_rate": 8.296749794524094e-05, "loss": 0.01, "step": 30730 }, { "epoch": 41.15127175368139, "grad_norm": 0.20440572500228882, "learning_rate": 8.295506475862332e-05, "loss": 0.0114, "step": 30740 }, { "epoch": 41.16465863453815, "grad_norm": 0.21531659364700317, "learning_rate": 8.294262796808933e-05, "loss": 0.0123, "step": 30750 }, { "epoch": 41.17804551539491, "grad_norm": 0.2530180811882019, "learning_rate": 8.293018757499909e-05, "loss": 0.013, "step": 30760 }, { "epoch": 41.19143239625167, "grad_norm": 0.1713733673095703, "learning_rate": 8.291774358071305e-05, "loss": 0.009, "step": 30770 }, { "epoch": 41.204819277108435, "grad_norm": 0.19784948229789734, "learning_rate": 8.290529598659205e-05, "loss": 0.0113, "step": 30780 }, { "epoch": 41.21820615796519, "grad_norm": 0.15952534973621368, "learning_rate": 8.289284479399738e-05, "loss": 0.0096, "step": 30790 }, { "epoch": 41.231593038821956, "grad_norm": 0.3074932098388672, "learning_rate": 8.288039000429064e-05, "loss": 0.01, "step": 30800 }, { "epoch": 41.24497991967871, "grad_norm": 0.4531451463699341, "learning_rate": 8.286793161883388e-05, "loss": 0.012, "step": 30810 }, { "epoch": 41.25836680053548, "grad_norm": 0.2542347311973572, "learning_rate": 8.285546963898954e-05, "loss": 0.0087, "step": 30820 }, { "epoch": 41.271753681392234, "grad_norm": 0.19546504318714142, "learning_rate": 8.284300406612044e-05, "loss": 0.0097, "step": 30830 }, { "epoch": 41.285140562249, "grad_norm": 0.23278027772903442, "learning_rate": 8.283053490158978e-05, "loss": 0.0098, "step": 30840 }, { "epoch": 41.298527443105755, "grad_norm": 0.23261165618896484, "learning_rate": 8.28180621467612e-05, "loss": 0.0131, "step": 30850 }, { "epoch": 41.31191432396252, "grad_norm": 0.28802424669265747, "learning_rate": 8.280558580299867e-05, "loss": 0.0102, "step": 30860 }, { "epoch": 41.325301204819276, "grad_norm": 0.16131362318992615, "learning_rate": 8.279310587166661e-05, "loss": 0.0093, "step": 30870 }, { "epoch": 41.33868808567604, "grad_norm": 0.1744258552789688, "learning_rate": 8.278062235412978e-05, "loss": 0.0099, "step": 30880 }, { "epoch": 41.3520749665328, "grad_norm": 0.1560913324356079, "learning_rate": 8.276813525175339e-05, "loss": 0.0101, "step": 30890 }, { "epoch": 41.36546184738956, "grad_norm": 0.6325582265853882, "learning_rate": 8.2755644565903e-05, "loss": 0.0085, "step": 30900 }, { "epoch": 41.37884872824632, "grad_norm": 0.18706294894218445, "learning_rate": 8.274315029794454e-05, "loss": 0.011, "step": 30910 }, { "epoch": 41.39223560910308, "grad_norm": 0.20819039642810822, "learning_rate": 8.273065244924443e-05, "loss": 0.0124, "step": 30920 }, { "epoch": 41.40562248995984, "grad_norm": 0.20057325065135956, "learning_rate": 8.271815102116936e-05, "loss": 0.0135, "step": 30930 }, { "epoch": 41.4190093708166, "grad_norm": 0.17458100616931915, "learning_rate": 8.270564601508648e-05, "loss": 0.0097, "step": 30940 }, { "epoch": 41.43239625167336, "grad_norm": 0.2843684256076813, "learning_rate": 8.269313743236333e-05, "loss": 0.0095, "step": 30950 }, { "epoch": 41.44578313253012, "grad_norm": 0.1380053013563156, "learning_rate": 8.268062527436783e-05, "loss": 0.0097, "step": 30960 }, { "epoch": 41.45917001338688, "grad_norm": 0.33806881308555603, "learning_rate": 8.266810954246828e-05, "loss": 0.012, "step": 30970 }, { "epoch": 41.47255689424364, "grad_norm": 0.30778831243515015, "learning_rate": 8.265559023803338e-05, "loss": 0.0132, "step": 30980 }, { "epoch": 41.4859437751004, "grad_norm": 0.18908290565013885, "learning_rate": 8.264306736243223e-05, "loss": 0.0117, "step": 30990 }, { "epoch": 41.49933065595716, "grad_norm": 0.20026427507400513, "learning_rate": 8.263054091703432e-05, "loss": 0.0088, "step": 31000 }, { "epoch": 41.51271753681392, "grad_norm": 0.2125520259141922, "learning_rate": 8.26180109032095e-05, "loss": 0.0102, "step": 31010 }, { "epoch": 41.52610441767068, "grad_norm": 0.3413325250148773, "learning_rate": 8.260547732232807e-05, "loss": 0.0116, "step": 31020 }, { "epoch": 41.539491298527444, "grad_norm": 0.12915056943893433, "learning_rate": 8.259294017576063e-05, "loss": 0.01, "step": 31030 }, { "epoch": 41.5528781793842, "grad_norm": 0.4552721381187439, "learning_rate": 8.258039946487828e-05, "loss": 0.0101, "step": 31040 }, { "epoch": 41.566265060240966, "grad_norm": 0.25232815742492676, "learning_rate": 8.256785519105241e-05, "loss": 0.0109, "step": 31050 }, { "epoch": 41.57965194109772, "grad_norm": 0.5327684283256531, "learning_rate": 8.255530735565488e-05, "loss": 0.0098, "step": 31060 }, { "epoch": 41.59303882195449, "grad_norm": 0.15966947376728058, "learning_rate": 8.254275596005787e-05, "loss": 0.0098, "step": 31070 }, { "epoch": 41.606425702811244, "grad_norm": 2.481395721435547, "learning_rate": 8.2530201005634e-05, "loss": 0.0112, "step": 31080 }, { "epoch": 41.61981258366801, "grad_norm": 0.18197564780712128, "learning_rate": 8.251764249375626e-05, "loss": 0.0127, "step": 31090 }, { "epoch": 41.633199464524765, "grad_norm": 0.15686725080013275, "learning_rate": 8.250508042579803e-05, "loss": 0.0082, "step": 31100 }, { "epoch": 41.64658634538153, "grad_norm": 0.3523377478122711, "learning_rate": 8.249251480313307e-05, "loss": 0.0131, "step": 31110 }, { "epoch": 41.659973226238286, "grad_norm": 0.29951778054237366, "learning_rate": 8.247994562713555e-05, "loss": 0.0089, "step": 31120 }, { "epoch": 41.67336010709505, "grad_norm": 0.286418616771698, "learning_rate": 8.246737289917998e-05, "loss": 0.0112, "step": 31130 }, { "epoch": 41.68674698795181, "grad_norm": 0.3169190585613251, "learning_rate": 8.245479662064135e-05, "loss": 0.0109, "step": 31140 }, { "epoch": 41.70013386880857, "grad_norm": 0.48340657353401184, "learning_rate": 8.244221679289496e-05, "loss": 0.0129, "step": 31150 }, { "epoch": 41.71352074966533, "grad_norm": 0.12625010311603546, "learning_rate": 8.242963341731652e-05, "loss": 0.0113, "step": 31160 }, { "epoch": 41.726907630522085, "grad_norm": 0.5198060870170593, "learning_rate": 8.24170464952821e-05, "loss": 0.0092, "step": 31170 }, { "epoch": 41.74029451137885, "grad_norm": 0.14700187742710114, "learning_rate": 8.240445602816824e-05, "loss": 0.0097, "step": 31180 }, { "epoch": 41.753681392235606, "grad_norm": 0.15943142771720886, "learning_rate": 8.239186201735179e-05, "loss": 0.0084, "step": 31190 }, { "epoch": 41.76706827309237, "grad_norm": 0.1985526829957962, "learning_rate": 8.237926446420998e-05, "loss": 0.0103, "step": 31200 }, { "epoch": 41.78045515394913, "grad_norm": 0.5413469672203064, "learning_rate": 8.236666337012052e-05, "loss": 0.0104, "step": 31210 }, { "epoch": 41.79384203480589, "grad_norm": 0.4654756188392639, "learning_rate": 8.23540587364614e-05, "loss": 0.0118, "step": 31220 }, { "epoch": 41.80722891566265, "grad_norm": 0.2852294445037842, "learning_rate": 8.234145056461107e-05, "loss": 0.0119, "step": 31230 }, { "epoch": 41.82061579651941, "grad_norm": 0.23670168220996857, "learning_rate": 8.232883885594832e-05, "loss": 0.0101, "step": 31240 }, { "epoch": 41.83400267737617, "grad_norm": 0.163221538066864, "learning_rate": 8.231622361185236e-05, "loss": 0.0114, "step": 31250 }, { "epoch": 41.84738955823293, "grad_norm": 0.1748521625995636, "learning_rate": 8.230360483370278e-05, "loss": 0.0091, "step": 31260 }, { "epoch": 41.86077643908969, "grad_norm": 1.0463653802871704, "learning_rate": 8.229098252287953e-05, "loss": 0.0109, "step": 31270 }, { "epoch": 41.874163319946454, "grad_norm": 0.2826821506023407, "learning_rate": 8.2278356680763e-05, "loss": 0.0098, "step": 31280 }, { "epoch": 41.88755020080321, "grad_norm": 0.2899247705936432, "learning_rate": 8.22657273087339e-05, "loss": 0.0097, "step": 31290 }, { "epoch": 41.900937081659976, "grad_norm": 0.18714697659015656, "learning_rate": 8.225309440817336e-05, "loss": 0.0105, "step": 31300 }, { "epoch": 41.91432396251673, "grad_norm": 0.16969074308872223, "learning_rate": 8.224045798046293e-05, "loss": 0.0086, "step": 31310 }, { "epoch": 41.9277108433735, "grad_norm": 0.13425664603710175, "learning_rate": 8.22278180269845e-05, "loss": 0.0112, "step": 31320 }, { "epoch": 41.941097724230254, "grad_norm": 0.1745426058769226, "learning_rate": 8.221517454912031e-05, "loss": 0.0089, "step": 31330 }, { "epoch": 41.95448460508702, "grad_norm": 0.25381192564964294, "learning_rate": 8.220252754825308e-05, "loss": 0.0102, "step": 31340 }, { "epoch": 41.967871485943775, "grad_norm": 0.30170485377311707, "learning_rate": 8.218987702576586e-05, "loss": 0.0098, "step": 31350 }, { "epoch": 41.98125836680054, "grad_norm": 0.19453094899654388, "learning_rate": 8.217722298304207e-05, "loss": 0.0134, "step": 31360 }, { "epoch": 41.994645247657296, "grad_norm": 0.18749572336673737, "learning_rate": 8.216456542146557e-05, "loss": 0.0104, "step": 31370 }, { "epoch": 42.00803212851405, "grad_norm": 0.47608017921447754, "learning_rate": 8.215190434242055e-05, "loss": 0.0096, "step": 31380 }, { "epoch": 42.02141900937082, "grad_norm": 0.1577044278383255, "learning_rate": 8.213923974729161e-05, "loss": 0.0097, "step": 31390 }, { "epoch": 42.034805890227574, "grad_norm": 0.2860114872455597, "learning_rate": 8.212657163746373e-05, "loss": 0.0122, "step": 31400 }, { "epoch": 42.04819277108434, "grad_norm": 0.24430423974990845, "learning_rate": 8.211390001432227e-05, "loss": 0.0081, "step": 31410 }, { "epoch": 42.061579651941095, "grad_norm": 0.21953238546848297, "learning_rate": 8.210122487925297e-05, "loss": 0.0112, "step": 31420 }, { "epoch": 42.07496653279786, "grad_norm": 0.2408682256937027, "learning_rate": 8.208854623364202e-05, "loss": 0.0115, "step": 31430 }, { "epoch": 42.088353413654616, "grad_norm": 0.2827094495296478, "learning_rate": 8.207586407887589e-05, "loss": 0.0124, "step": 31440 }, { "epoch": 42.10174029451138, "grad_norm": 0.2157178819179535, "learning_rate": 8.206317841634148e-05, "loss": 0.0109, "step": 31450 }, { "epoch": 42.11512717536814, "grad_norm": 0.175815612077713, "learning_rate": 8.205048924742609e-05, "loss": 0.0098, "step": 31460 }, { "epoch": 42.1285140562249, "grad_norm": 0.22249741852283478, "learning_rate": 8.203779657351738e-05, "loss": 0.0083, "step": 31470 }, { "epoch": 42.14190093708166, "grad_norm": 0.1577153503894806, "learning_rate": 8.20251003960034e-05, "loss": 0.0093, "step": 31480 }, { "epoch": 42.15528781793842, "grad_norm": 0.13295210897922516, "learning_rate": 8.201240071627258e-05, "loss": 0.0092, "step": 31490 }, { "epoch": 42.16867469879518, "grad_norm": 0.15805785357952118, "learning_rate": 8.199969753571377e-05, "loss": 0.0097, "step": 31500 }, { "epoch": 42.18206157965194, "grad_norm": 0.19942091405391693, "learning_rate": 8.198699085571615e-05, "loss": 0.01, "step": 31510 }, { "epoch": 42.1954484605087, "grad_norm": 0.19606918096542358, "learning_rate": 8.197428067766928e-05, "loss": 0.008, "step": 31520 }, { "epoch": 42.208835341365464, "grad_norm": 0.24939461052417755, "learning_rate": 8.196156700296316e-05, "loss": 0.0104, "step": 31530 }, { "epoch": 42.22222222222222, "grad_norm": 0.1323649138212204, "learning_rate": 8.194884983298814e-05, "loss": 0.0106, "step": 31540 }, { "epoch": 42.235609103078986, "grad_norm": 0.1252935826778412, "learning_rate": 8.193612916913491e-05, "loss": 0.0095, "step": 31550 }, { "epoch": 42.24899598393574, "grad_norm": 0.4116942286491394, "learning_rate": 8.192340501279463e-05, "loss": 0.0119, "step": 31560 }, { "epoch": 42.26238286479251, "grad_norm": 0.2735898792743683, "learning_rate": 8.191067736535876e-05, "loss": 0.0091, "step": 31570 }, { "epoch": 42.275769745649264, "grad_norm": 0.16511858999729156, "learning_rate": 8.18979462282192e-05, "loss": 0.0128, "step": 31580 }, { "epoch": 42.28915662650602, "grad_norm": 0.306985467672348, "learning_rate": 8.188521160276819e-05, "loss": 0.0121, "step": 31590 }, { "epoch": 42.302543507362785, "grad_norm": 0.2088162750005722, "learning_rate": 8.187247349039837e-05, "loss": 0.0095, "step": 31600 }, { "epoch": 42.31593038821954, "grad_norm": 0.23916268348693848, "learning_rate": 8.185973189250278e-05, "loss": 0.01, "step": 31610 }, { "epoch": 42.329317269076306, "grad_norm": 0.1849430501461029, "learning_rate": 8.184698681047482e-05, "loss": 0.0103, "step": 31620 }, { "epoch": 42.34270414993306, "grad_norm": 0.14039014279842377, "learning_rate": 8.183423824570827e-05, "loss": 0.0086, "step": 31630 }, { "epoch": 42.35609103078983, "grad_norm": 0.288423091173172, "learning_rate": 8.182148619959725e-05, "loss": 0.0114, "step": 31640 }, { "epoch": 42.369477911646584, "grad_norm": 0.28472962975502014, "learning_rate": 8.180873067353636e-05, "loss": 0.0119, "step": 31650 }, { "epoch": 42.38286479250335, "grad_norm": 0.20664241909980774, "learning_rate": 8.179597166892052e-05, "loss": 0.0121, "step": 31660 }, { "epoch": 42.396251673360105, "grad_norm": 0.1301373392343521, "learning_rate": 8.178320918714501e-05, "loss": 0.0111, "step": 31670 }, { "epoch": 42.40963855421687, "grad_norm": 0.1603415310382843, "learning_rate": 8.177044322960554e-05, "loss": 0.0124, "step": 31680 }, { "epoch": 42.423025435073626, "grad_norm": 0.39970582723617554, "learning_rate": 8.175767379769816e-05, "loss": 0.0108, "step": 31690 }, { "epoch": 42.43641231593039, "grad_norm": 0.2594336271286011, "learning_rate": 8.174490089281932e-05, "loss": 0.0094, "step": 31700 }, { "epoch": 42.44979919678715, "grad_norm": 0.2679770886898041, "learning_rate": 8.173212451636584e-05, "loss": 0.0089, "step": 31710 }, { "epoch": 42.46318607764391, "grad_norm": 0.11768611520528793, "learning_rate": 8.171934466973493e-05, "loss": 0.0091, "step": 31720 }, { "epoch": 42.47657295850067, "grad_norm": 0.20289596915245056, "learning_rate": 8.170656135432418e-05, "loss": 0.0129, "step": 31730 }, { "epoch": 42.48995983935743, "grad_norm": 3.549234628677368, "learning_rate": 8.169377457153155e-05, "loss": 0.0127, "step": 31740 }, { "epoch": 42.50334672021419, "grad_norm": 0.22726526856422424, "learning_rate": 8.168098432275539e-05, "loss": 0.0117, "step": 31750 }, { "epoch": 42.51673360107095, "grad_norm": 0.15840768814086914, "learning_rate": 8.166819060939442e-05, "loss": 0.0119, "step": 31760 }, { "epoch": 42.53012048192771, "grad_norm": 0.19214697182178497, "learning_rate": 8.165539343284772e-05, "loss": 0.0135, "step": 31770 }, { "epoch": 42.543507362784474, "grad_norm": 0.1964067816734314, "learning_rate": 8.16425927945148e-05, "loss": 0.0115, "step": 31780 }, { "epoch": 42.55689424364123, "grad_norm": 0.11837615072727203, "learning_rate": 8.162978869579551e-05, "loss": 0.0093, "step": 31790 }, { "epoch": 42.570281124497996, "grad_norm": 0.1203923150897026, "learning_rate": 8.161698113809007e-05, "loss": 0.0106, "step": 31800 }, { "epoch": 42.58366800535475, "grad_norm": 0.2774207293987274, "learning_rate": 8.160417012279911e-05, "loss": 0.013, "step": 31810 }, { "epoch": 42.59705488621151, "grad_norm": 0.1880640834569931, "learning_rate": 8.159135565132363e-05, "loss": 0.0125, "step": 31820 }, { "epoch": 42.610441767068274, "grad_norm": 0.7185260057449341, "learning_rate": 8.157853772506498e-05, "loss": 0.0124, "step": 31830 }, { "epoch": 42.62382864792503, "grad_norm": 0.1722010374069214, "learning_rate": 8.156571634542494e-05, "loss": 0.0114, "step": 31840 }, { "epoch": 42.637215528781795, "grad_norm": 0.18299728631973267, "learning_rate": 8.15528915138056e-05, "loss": 0.0109, "step": 31850 }, { "epoch": 42.65060240963855, "grad_norm": 0.18930314481258392, "learning_rate": 8.154006323160949e-05, "loss": 0.013, "step": 31860 }, { "epoch": 42.663989290495316, "grad_norm": 0.25978007912635803, "learning_rate": 8.152723150023949e-05, "loss": 0.0108, "step": 31870 }, { "epoch": 42.67737617135207, "grad_norm": 0.22538618743419647, "learning_rate": 8.151439632109886e-05, "loss": 0.0131, "step": 31880 }, { "epoch": 42.69076305220884, "grad_norm": 0.19807982444763184, "learning_rate": 8.150155769559122e-05, "loss": 0.0131, "step": 31890 }, { "epoch": 42.704149933065594, "grad_norm": 0.21113045513629913, "learning_rate": 8.148871562512058e-05, "loss": 0.0102, "step": 31900 }, { "epoch": 42.71753681392236, "grad_norm": 0.1752384901046753, "learning_rate": 8.147587011109136e-05, "loss": 0.0113, "step": 31910 }, { "epoch": 42.730923694779115, "grad_norm": 0.18773113191127777, "learning_rate": 8.14630211549083e-05, "loss": 0.0126, "step": 31920 }, { "epoch": 42.74431057563588, "grad_norm": 0.18509377539157867, "learning_rate": 8.145016875797655e-05, "loss": 0.0103, "step": 31930 }, { "epoch": 42.757697456492636, "grad_norm": 0.17140211164951324, "learning_rate": 8.143731292170164e-05, "loss": 0.0108, "step": 31940 }, { "epoch": 42.7710843373494, "grad_norm": 0.16141876578330994, "learning_rate": 8.142445364748944e-05, "loss": 0.0098, "step": 31950 }, { "epoch": 42.78447121820616, "grad_norm": 0.236440047621727, "learning_rate": 8.141159093674624e-05, "loss": 0.0099, "step": 31960 }, { "epoch": 42.79785809906292, "grad_norm": 0.16568498313426971, "learning_rate": 8.139872479087869e-05, "loss": 0.0103, "step": 31970 }, { "epoch": 42.81124497991968, "grad_norm": 0.1927831918001175, "learning_rate": 8.13858552112938e-05, "loss": 0.0128, "step": 31980 }, { "epoch": 42.82463186077644, "grad_norm": 0.1475573480129242, "learning_rate": 8.137298219939895e-05, "loss": 0.0098, "step": 31990 }, { "epoch": 42.8380187416332, "grad_norm": 0.18378502130508423, "learning_rate": 8.136010575660196e-05, "loss": 0.0127, "step": 32000 }, { "epoch": 42.85140562248996, "grad_norm": 0.16198523342609406, "learning_rate": 8.134722588431095e-05, "loss": 0.0109, "step": 32010 }, { "epoch": 42.86479250334672, "grad_norm": 0.16836325824260712, "learning_rate": 8.133434258393444e-05, "loss": 0.0092, "step": 32020 }, { "epoch": 42.87817938420348, "grad_norm": 0.2313542515039444, "learning_rate": 8.132145585688134e-05, "loss": 0.0114, "step": 32030 }, { "epoch": 42.89156626506024, "grad_norm": 0.3055002987384796, "learning_rate": 8.130856570456093e-05, "loss": 0.0123, "step": 32040 }, { "epoch": 42.904953145917, "grad_norm": 0.23553664982318878, "learning_rate": 8.129567212838283e-05, "loss": 0.0127, "step": 32050 }, { "epoch": 42.91834002677376, "grad_norm": 0.31158819794654846, "learning_rate": 8.128277512975708e-05, "loss": 0.0123, "step": 32060 }, { "epoch": 42.93172690763052, "grad_norm": 0.1661199927330017, "learning_rate": 8.126987471009408e-05, "loss": 0.0122, "step": 32070 }, { "epoch": 42.945113788487284, "grad_norm": 0.2352694869041443, "learning_rate": 8.125697087080459e-05, "loss": 0.0112, "step": 32080 }, { "epoch": 42.95850066934404, "grad_norm": 0.18506695330142975, "learning_rate": 8.124406361329976e-05, "loss": 0.0108, "step": 32090 }, { "epoch": 42.971887550200805, "grad_norm": 0.1415473371744156, "learning_rate": 8.123115293899111e-05, "loss": 0.0094, "step": 32100 }, { "epoch": 42.98527443105756, "grad_norm": 0.19541147351264954, "learning_rate": 8.121823884929055e-05, "loss": 0.011, "step": 32110 }, { "epoch": 42.998661311914326, "grad_norm": 0.15976165235042572, "learning_rate": 8.12053213456103e-05, "loss": 0.0098, "step": 32120 }, { "epoch": 43.01204819277108, "grad_norm": 3.0729775428771973, "learning_rate": 8.119240042936303e-05, "loss": 0.0143, "step": 32130 }, { "epoch": 43.02543507362785, "grad_norm": 0.16621828079223633, "learning_rate": 8.117947610196175e-05, "loss": 0.012, "step": 32140 }, { "epoch": 43.038821954484604, "grad_norm": 0.2563892900943756, "learning_rate": 8.116654836481982e-05, "loss": 0.0132, "step": 32150 }, { "epoch": 43.05220883534137, "grad_norm": 0.21064187586307526, "learning_rate": 8.115361721935106e-05, "loss": 0.0111, "step": 32160 }, { "epoch": 43.065595716198125, "grad_norm": 0.14940783381462097, "learning_rate": 8.114068266696953e-05, "loss": 0.0114, "step": 32170 }, { "epoch": 43.07898259705489, "grad_norm": 0.16897396743297577, "learning_rate": 8.112774470908978e-05, "loss": 0.011, "step": 32180 }, { "epoch": 43.092369477911646, "grad_norm": 0.18853314220905304, "learning_rate": 8.111480334712665e-05, "loss": 0.0127, "step": 32190 }, { "epoch": 43.10575635876841, "grad_norm": 0.22045546770095825, "learning_rate": 8.110185858249542e-05, "loss": 0.0112, "step": 32200 }, { "epoch": 43.11914323962517, "grad_norm": 0.18138746917247772, "learning_rate": 8.108891041661168e-05, "loss": 0.0108, "step": 32210 }, { "epoch": 43.13253012048193, "grad_norm": 0.15334002673625946, "learning_rate": 8.107595885089146e-05, "loss": 0.0132, "step": 32220 }, { "epoch": 43.14591700133869, "grad_norm": 0.3302076756954193, "learning_rate": 8.106300388675108e-05, "loss": 0.0112, "step": 32230 }, { "epoch": 43.159303882195445, "grad_norm": 0.18962961435317993, "learning_rate": 8.105004552560731e-05, "loss": 0.009, "step": 32240 }, { "epoch": 43.17269076305221, "grad_norm": 0.19293351471424103, "learning_rate": 8.103708376887724e-05, "loss": 0.0115, "step": 32250 }, { "epoch": 43.186077643908966, "grad_norm": 0.8247418403625488, "learning_rate": 8.102411861797836e-05, "loss": 0.0122, "step": 32260 }, { "epoch": 43.19946452476573, "grad_norm": 0.2109793722629547, "learning_rate": 8.101115007432851e-05, "loss": 0.0103, "step": 32270 }, { "epoch": 43.21285140562249, "grad_norm": 0.1946270614862442, "learning_rate": 8.09981781393459e-05, "loss": 0.0102, "step": 32280 }, { "epoch": 43.22623828647925, "grad_norm": 0.2408604919910431, "learning_rate": 8.098520281444915e-05, "loss": 0.0124, "step": 32290 }, { "epoch": 43.23962516733601, "grad_norm": 0.3480418026447296, "learning_rate": 8.09722241010572e-05, "loss": 0.0089, "step": 32300 }, { "epoch": 43.25301204819277, "grad_norm": 0.15239018201828003, "learning_rate": 8.095924200058939e-05, "loss": 0.0105, "step": 32310 }, { "epoch": 43.26639892904953, "grad_norm": 0.1902354657649994, "learning_rate": 8.094625651446541e-05, "loss": 0.0107, "step": 32320 }, { "epoch": 43.27978580990629, "grad_norm": 0.15830829739570618, "learning_rate": 8.093326764410536e-05, "loss": 0.0112, "step": 32330 }, { "epoch": 43.29317269076305, "grad_norm": 0.4575965106487274, "learning_rate": 8.092027539092966e-05, "loss": 0.0117, "step": 32340 }, { "epoch": 43.306559571619815, "grad_norm": 0.24962998926639557, "learning_rate": 8.090727975635913e-05, "loss": 0.0127, "step": 32350 }, { "epoch": 43.31994645247657, "grad_norm": 0.2177237570285797, "learning_rate": 8.089428074181497e-05, "loss": 0.0105, "step": 32360 }, { "epoch": 43.333333333333336, "grad_norm": 0.23308362066745758, "learning_rate": 8.088127834871871e-05, "loss": 0.0123, "step": 32370 }, { "epoch": 43.34672021419009, "grad_norm": 0.21610385179519653, "learning_rate": 8.086827257849226e-05, "loss": 0.0123, "step": 32380 }, { "epoch": 43.36010709504686, "grad_norm": 0.12857335805892944, "learning_rate": 8.085526343255795e-05, "loss": 0.0106, "step": 32390 }, { "epoch": 43.373493975903614, "grad_norm": 0.19450820982456207, "learning_rate": 8.084225091233842e-05, "loss": 0.0089, "step": 32400 }, { "epoch": 43.38688085676038, "grad_norm": 0.4610513150691986, "learning_rate": 8.082923501925668e-05, "loss": 0.0117, "step": 32410 }, { "epoch": 43.400267737617135, "grad_norm": 0.1398957073688507, "learning_rate": 8.081621575473617e-05, "loss": 0.0105, "step": 32420 }, { "epoch": 43.4136546184739, "grad_norm": 0.34868207573890686, "learning_rate": 8.080319312020064e-05, "loss": 0.0112, "step": 32430 }, { "epoch": 43.427041499330656, "grad_norm": 0.18168136477470398, "learning_rate": 8.079016711707421e-05, "loss": 0.0099, "step": 32440 }, { "epoch": 43.44042838018741, "grad_norm": 0.22597871720790863, "learning_rate": 8.077713774678139e-05, "loss": 0.013, "step": 32450 }, { "epoch": 43.45381526104418, "grad_norm": 0.2639765441417694, "learning_rate": 8.076410501074707e-05, "loss": 0.0141, "step": 32460 }, { "epoch": 43.467202141900934, "grad_norm": 0.5105646252632141, "learning_rate": 8.075106891039647e-05, "loss": 0.0117, "step": 32470 }, { "epoch": 43.4805890227577, "grad_norm": 0.24072207510471344, "learning_rate": 8.073802944715523e-05, "loss": 0.0111, "step": 32480 }, { "epoch": 43.493975903614455, "grad_norm": 0.6568896174430847, "learning_rate": 8.072498662244929e-05, "loss": 0.0087, "step": 32490 }, { "epoch": 43.50736278447122, "grad_norm": 0.3058460056781769, "learning_rate": 8.0711940437705e-05, "loss": 0.0127, "step": 32500 }, { "epoch": 43.520749665327976, "grad_norm": 0.30889278650283813, "learning_rate": 8.06988908943491e-05, "loss": 0.0101, "step": 32510 }, { "epoch": 43.53413654618474, "grad_norm": 0.15815672278404236, "learning_rate": 8.068583799380863e-05, "loss": 0.0094, "step": 32520 }, { "epoch": 43.5475234270415, "grad_norm": 0.4198214113712311, "learning_rate": 8.067278173751104e-05, "loss": 0.0136, "step": 32530 }, { "epoch": 43.56091030789826, "grad_norm": 0.35846880078315735, "learning_rate": 8.065972212688417e-05, "loss": 0.0108, "step": 32540 }, { "epoch": 43.57429718875502, "grad_norm": 0.14396388828754425, "learning_rate": 8.064665916335618e-05, "loss": 0.0102, "step": 32550 }, { "epoch": 43.58768406961178, "grad_norm": 0.20905567705631256, "learning_rate": 8.063359284835564e-05, "loss": 0.0089, "step": 32560 }, { "epoch": 43.60107095046854, "grad_norm": 0.21553649008274078, "learning_rate": 8.062052318331142e-05, "loss": 0.0094, "step": 32570 }, { "epoch": 43.6144578313253, "grad_norm": 0.2528960108757019, "learning_rate": 8.060745016965283e-05, "loss": 0.0094, "step": 32580 }, { "epoch": 43.62784471218206, "grad_norm": 0.22879508137702942, "learning_rate": 8.059437380880952e-05, "loss": 0.0116, "step": 32590 }, { "epoch": 43.641231593038825, "grad_norm": 0.2184653878211975, "learning_rate": 8.058129410221146e-05, "loss": 0.0116, "step": 32600 }, { "epoch": 43.65461847389558, "grad_norm": 0.2811056673526764, "learning_rate": 8.056821105128908e-05, "loss": 0.0101, "step": 32610 }, { "epoch": 43.668005354752346, "grad_norm": 0.26121217012405396, "learning_rate": 8.05551246574731e-05, "loss": 0.0092, "step": 32620 }, { "epoch": 43.6813922356091, "grad_norm": 0.2761942446231842, "learning_rate": 8.05420349221946e-05, "loss": 0.0102, "step": 32630 }, { "epoch": 43.69477911646587, "grad_norm": 0.19972574710845947, "learning_rate": 8.05289418468851e-05, "loss": 0.0115, "step": 32640 }, { "epoch": 43.708165997322624, "grad_norm": 0.3932955265045166, "learning_rate": 8.051584543297642e-05, "loss": 0.0102, "step": 32650 }, { "epoch": 43.72155287817938, "grad_norm": 0.3060183525085449, "learning_rate": 8.050274568190074e-05, "loss": 0.0102, "step": 32660 }, { "epoch": 43.734939759036145, "grad_norm": 0.23562747240066528, "learning_rate": 8.048964259509067e-05, "loss": 0.0088, "step": 32670 }, { "epoch": 43.7483266398929, "grad_norm": 1.1291844844818115, "learning_rate": 8.047653617397914e-05, "loss": 0.0116, "step": 32680 }, { "epoch": 43.761713520749666, "grad_norm": 0.20132145285606384, "learning_rate": 8.046342641999941e-05, "loss": 0.0089, "step": 32690 }, { "epoch": 43.77510040160642, "grad_norm": 0.14973753690719604, "learning_rate": 8.045031333458517e-05, "loss": 0.0095, "step": 32700 }, { "epoch": 43.78848728246319, "grad_norm": 0.1331842690706253, "learning_rate": 8.043719691917047e-05, "loss": 0.0096, "step": 32710 }, { "epoch": 43.801874163319944, "grad_norm": 0.21898671984672546, "learning_rate": 8.042407717518966e-05, "loss": 0.0087, "step": 32720 }, { "epoch": 43.81526104417671, "grad_norm": 0.2042125165462494, "learning_rate": 8.041095410407751e-05, "loss": 0.0114, "step": 32730 }, { "epoch": 43.828647925033465, "grad_norm": 0.16746209561824799, "learning_rate": 8.039782770726913e-05, "loss": 0.0106, "step": 32740 }, { "epoch": 43.84203480589023, "grad_norm": 0.1622782051563263, "learning_rate": 8.038469798620004e-05, "loss": 0.0096, "step": 32750 }, { "epoch": 43.855421686746986, "grad_norm": 0.16994264721870422, "learning_rate": 8.037156494230604e-05, "loss": 0.0094, "step": 32760 }, { "epoch": 43.86880856760375, "grad_norm": 0.3241899907588959, "learning_rate": 8.035842857702338e-05, "loss": 0.0093, "step": 32770 }, { "epoch": 43.88219544846051, "grad_norm": 0.14292018115520477, "learning_rate": 8.03452888917886e-05, "loss": 0.0089, "step": 32780 }, { "epoch": 43.89558232931727, "grad_norm": 0.1765940636396408, "learning_rate": 8.033214588803866e-05, "loss": 0.0103, "step": 32790 }, { "epoch": 43.90896921017403, "grad_norm": 0.35445380210876465, "learning_rate": 8.031899956721083e-05, "loss": 0.0103, "step": 32800 }, { "epoch": 43.92235609103079, "grad_norm": 0.12936198711395264, "learning_rate": 8.030584993074282e-05, "loss": 0.0119, "step": 32810 }, { "epoch": 43.93574297188755, "grad_norm": 0.18383821845054626, "learning_rate": 8.02926969800726e-05, "loss": 0.0115, "step": 32820 }, { "epoch": 43.94912985274431, "grad_norm": 0.20522324740886688, "learning_rate": 8.027954071663859e-05, "loss": 0.0096, "step": 32830 }, { "epoch": 43.96251673360107, "grad_norm": 0.15869638323783875, "learning_rate": 8.026638114187954e-05, "loss": 0.0101, "step": 32840 }, { "epoch": 43.975903614457835, "grad_norm": 0.3860965371131897, "learning_rate": 8.025321825723456e-05, "loss": 0.0094, "step": 32850 }, { "epoch": 43.98929049531459, "grad_norm": 0.14323806762695312, "learning_rate": 8.02400520641431e-05, "loss": 0.0092, "step": 32860 }, { "epoch": 44.00267737617135, "grad_norm": 0.2855769395828247, "learning_rate": 8.022688256404501e-05, "loss": 0.0109, "step": 32870 }, { "epoch": 44.01606425702811, "grad_norm": 0.17101913690567017, "learning_rate": 8.02137097583805e-05, "loss": 0.0103, "step": 32880 }, { "epoch": 44.02945113788487, "grad_norm": 0.37323036789894104, "learning_rate": 8.02005336485901e-05, "loss": 0.0098, "step": 32890 }, { "epoch": 44.042838018741634, "grad_norm": 0.9100759625434875, "learning_rate": 8.018735423611476e-05, "loss": 0.0112, "step": 32900 }, { "epoch": 44.05622489959839, "grad_norm": 0.18424127995967865, "learning_rate": 8.017417152239574e-05, "loss": 0.0105, "step": 32910 }, { "epoch": 44.069611780455155, "grad_norm": 0.5737093687057495, "learning_rate": 8.01609855088747e-05, "loss": 0.0111, "step": 32920 }, { "epoch": 44.08299866131191, "grad_norm": 0.1741785854101181, "learning_rate": 8.014779619699362e-05, "loss": 0.0131, "step": 32930 }, { "epoch": 44.096385542168676, "grad_norm": 0.2474980652332306, "learning_rate": 8.013460358819489e-05, "loss": 0.0099, "step": 32940 }, { "epoch": 44.10977242302543, "grad_norm": 0.13418912887573242, "learning_rate": 8.01214076839212e-05, "loss": 0.009, "step": 32950 }, { "epoch": 44.1231593038822, "grad_norm": 0.18623904883861542, "learning_rate": 8.010820848561565e-05, "loss": 0.0114, "step": 32960 }, { "epoch": 44.136546184738954, "grad_norm": 0.17924900352954865, "learning_rate": 8.009500599472171e-05, "loss": 0.0082, "step": 32970 }, { "epoch": 44.14993306559572, "grad_norm": 0.24832840263843536, "learning_rate": 8.008180021268314e-05, "loss": 0.0095, "step": 32980 }, { "epoch": 44.163319946452475, "grad_norm": 0.16112245619297028, "learning_rate": 8.006859114094414e-05, "loss": 0.0119, "step": 32990 }, { "epoch": 44.17670682730924, "grad_norm": 0.12093659490346909, "learning_rate": 8.005537878094921e-05, "loss": 0.0124, "step": 33000 }, { "epoch": 44.190093708165996, "grad_norm": 0.7466188073158264, "learning_rate": 8.004216313414323e-05, "loss": 0.0088, "step": 33010 }, { "epoch": 44.20348058902276, "grad_norm": 1.3992092609405518, "learning_rate": 8.002894420197149e-05, "loss": 0.0102, "step": 33020 }, { "epoch": 44.21686746987952, "grad_norm": 0.23652148246765137, "learning_rate": 8.001572198587954e-05, "loss": 0.0091, "step": 33030 }, { "epoch": 44.23025435073628, "grad_norm": 0.19205079972743988, "learning_rate": 8.000249648731338e-05, "loss": 0.0105, "step": 33040 }, { "epoch": 44.24364123159304, "grad_norm": 0.3123953342437744, "learning_rate": 7.998926770771928e-05, "loss": 0.0093, "step": 33050 }, { "epoch": 44.2570281124498, "grad_norm": 0.21468769013881683, "learning_rate": 7.997603564854397e-05, "loss": 0.0095, "step": 33060 }, { "epoch": 44.27041499330656, "grad_norm": 0.21757879853248596, "learning_rate": 7.996280031123448e-05, "loss": 0.0085, "step": 33070 }, { "epoch": 44.283801874163316, "grad_norm": 0.20857740938663483, "learning_rate": 7.994956169723818e-05, "loss": 0.0096, "step": 33080 }, { "epoch": 44.29718875502008, "grad_norm": 0.24762097001075745, "learning_rate": 7.993631980800285e-05, "loss": 0.0093, "step": 33090 }, { "epoch": 44.31057563587684, "grad_norm": 0.21066994965076447, "learning_rate": 7.992307464497659e-05, "loss": 0.0083, "step": 33100 }, { "epoch": 44.3239625167336, "grad_norm": 0.19680477678775787, "learning_rate": 7.990982620960787e-05, "loss": 0.0108, "step": 33110 }, { "epoch": 44.33734939759036, "grad_norm": 0.15514253079891205, "learning_rate": 7.989657450334554e-05, "loss": 0.0094, "step": 33120 }, { "epoch": 44.35073627844712, "grad_norm": 0.17413756251335144, "learning_rate": 7.988331952763877e-05, "loss": 0.0097, "step": 33130 }, { "epoch": 44.36412315930388, "grad_norm": 0.27012503147125244, "learning_rate": 7.987006128393709e-05, "loss": 0.0093, "step": 33140 }, { "epoch": 44.377510040160644, "grad_norm": 0.29569360613822937, "learning_rate": 7.985679977369043e-05, "loss": 0.0078, "step": 33150 }, { "epoch": 44.3908969210174, "grad_norm": 0.15924859046936035, "learning_rate": 7.984353499834902e-05, "loss": 0.0103, "step": 33160 }, { "epoch": 44.404283801874165, "grad_norm": 0.18842966854572296, "learning_rate": 7.983026695936351e-05, "loss": 0.0086, "step": 33170 }, { "epoch": 44.41767068273092, "grad_norm": 0.1619725227355957, "learning_rate": 7.981699565818486e-05, "loss": 0.0102, "step": 33180 }, { "epoch": 44.431057563587686, "grad_norm": 0.3766980469226837, "learning_rate": 7.980372109626437e-05, "loss": 0.0086, "step": 33190 }, { "epoch": 44.44444444444444, "grad_norm": 0.16325189173221588, "learning_rate": 7.979044327505375e-05, "loss": 0.009, "step": 33200 }, { "epoch": 44.45783132530121, "grad_norm": 0.11437273025512695, "learning_rate": 7.977716219600506e-05, "loss": 0.009, "step": 33210 }, { "epoch": 44.471218206157964, "grad_norm": 0.23339535295963287, "learning_rate": 7.97638778605707e-05, "loss": 0.0086, "step": 33220 }, { "epoch": 44.48460508701473, "grad_norm": 0.1775071769952774, "learning_rate": 7.975059027020338e-05, "loss": 0.0104, "step": 33230 }, { "epoch": 44.497991967871485, "grad_norm": 1.8613848686218262, "learning_rate": 7.973729942635623e-05, "loss": 0.0116, "step": 33240 }, { "epoch": 44.51137884872825, "grad_norm": 3.7054429054260254, "learning_rate": 7.972400533048273e-05, "loss": 0.0115, "step": 33250 }, { "epoch": 44.524765729585006, "grad_norm": 0.5031951665878296, "learning_rate": 7.97107079840367e-05, "loss": 0.0117, "step": 33260 }, { "epoch": 44.53815261044177, "grad_norm": 0.24216759204864502, "learning_rate": 7.969740738847231e-05, "loss": 0.0112, "step": 33270 }, { "epoch": 44.55153949129853, "grad_norm": 0.1976964771747589, "learning_rate": 7.968410354524411e-05, "loss": 0.0113, "step": 33280 }, { "epoch": 44.56492637215529, "grad_norm": 0.20924104750156403, "learning_rate": 7.967079645580697e-05, "loss": 0.0115, "step": 33290 }, { "epoch": 44.57831325301205, "grad_norm": 0.4499256908893585, "learning_rate": 7.965748612161612e-05, "loss": 0.0091, "step": 33300 }, { "epoch": 44.591700133868805, "grad_norm": 0.437786728143692, "learning_rate": 7.96441725441272e-05, "loss": 0.009, "step": 33310 }, { "epoch": 44.60508701472557, "grad_norm": 0.45890387892723083, "learning_rate": 7.963085572479614e-05, "loss": 0.0109, "step": 33320 }, { "epoch": 44.618473895582326, "grad_norm": 0.22031550109386444, "learning_rate": 7.961753566507924e-05, "loss": 0.0107, "step": 33330 }, { "epoch": 44.63186077643909, "grad_norm": 0.21166451275348663, "learning_rate": 7.960421236643316e-05, "loss": 0.0117, "step": 33340 }, { "epoch": 44.64524765729585, "grad_norm": 0.22984787821769714, "learning_rate": 7.959088583031496e-05, "loss": 0.0105, "step": 33350 }, { "epoch": 44.65863453815261, "grad_norm": 0.2902134358882904, "learning_rate": 7.957755605818194e-05, "loss": 0.0108, "step": 33360 }, { "epoch": 44.67202141900937, "grad_norm": 0.3424593210220337, "learning_rate": 7.956422305149185e-05, "loss": 0.0108, "step": 33370 }, { "epoch": 44.68540829986613, "grad_norm": 0.17997649312019348, "learning_rate": 7.95508868117028e-05, "loss": 0.0128, "step": 33380 }, { "epoch": 44.69879518072289, "grad_norm": 0.4406800866127014, "learning_rate": 7.953754734027318e-05, "loss": 0.0099, "step": 33390 }, { "epoch": 44.712182061579654, "grad_norm": 0.24120520055294037, "learning_rate": 7.952420463866182e-05, "loss": 0.0096, "step": 33400 }, { "epoch": 44.72556894243641, "grad_norm": 0.5700497031211853, "learning_rate": 7.951085870832782e-05, "loss": 0.0092, "step": 33410 }, { "epoch": 44.738955823293175, "grad_norm": 0.125695139169693, "learning_rate": 7.949750955073067e-05, "loss": 0.008, "step": 33420 }, { "epoch": 44.75234270414993, "grad_norm": 0.18353021144866943, "learning_rate": 7.948415716733022e-05, "loss": 0.0096, "step": 33430 }, { "epoch": 44.765729585006696, "grad_norm": 0.17972508072853088, "learning_rate": 7.947080155958669e-05, "loss": 0.0115, "step": 33440 }, { "epoch": 44.77911646586345, "grad_norm": 0.44919949769973755, "learning_rate": 7.94574427289606e-05, "loss": 0.0109, "step": 33450 }, { "epoch": 44.79250334672022, "grad_norm": 1.044359564781189, "learning_rate": 7.944408067691284e-05, "loss": 0.011, "step": 33460 }, { "epoch": 44.805890227576974, "grad_norm": 1.022498369216919, "learning_rate": 7.943071540490473e-05, "loss": 0.0132, "step": 33470 }, { "epoch": 44.81927710843374, "grad_norm": 1.0768022537231445, "learning_rate": 7.94173469143978e-05, "loss": 0.0154, "step": 33480 }, { "epoch": 44.832663989290495, "grad_norm": 0.1940617561340332, "learning_rate": 7.940397520685406e-05, "loss": 0.0099, "step": 33490 }, { "epoch": 44.84605087014726, "grad_norm": 0.41179192066192627, "learning_rate": 7.939060028373577e-05, "loss": 0.0116, "step": 33500 }, { "epoch": 44.859437751004016, "grad_norm": 0.6516801714897156, "learning_rate": 7.937722214650565e-05, "loss": 0.0111, "step": 33510 }, { "epoch": 44.87282463186077, "grad_norm": 0.18311603367328644, "learning_rate": 7.936384079662666e-05, "loss": 0.0102, "step": 33520 }, { "epoch": 44.88621151271754, "grad_norm": 0.1658201664686203, "learning_rate": 7.93504562355622e-05, "loss": 0.0101, "step": 33530 }, { "epoch": 44.899598393574294, "grad_norm": 0.15363501012325287, "learning_rate": 7.933706846477599e-05, "loss": 0.0113, "step": 33540 }, { "epoch": 44.91298527443106, "grad_norm": 0.3317163288593292, "learning_rate": 7.932367748573206e-05, "loss": 0.0095, "step": 33550 }, { "epoch": 44.926372155287815, "grad_norm": 0.2527541518211365, "learning_rate": 7.931028329989485e-05, "loss": 0.0114, "step": 33560 }, { "epoch": 44.93975903614458, "grad_norm": 0.36562594771385193, "learning_rate": 7.929688590872913e-05, "loss": 0.0089, "step": 33570 }, { "epoch": 44.953145917001336, "grad_norm": 0.33957669138908386, "learning_rate": 7.928348531370003e-05, "loss": 0.0091, "step": 33580 }, { "epoch": 44.9665327978581, "grad_norm": 0.40638312697410583, "learning_rate": 7.927008151627297e-05, "loss": 0.0095, "step": 33590 }, { "epoch": 44.97991967871486, "grad_norm": 0.24252691864967346, "learning_rate": 7.925667451791383e-05, "loss": 0.0105, "step": 33600 }, { "epoch": 44.99330655957162, "grad_norm": 0.3754323124885559, "learning_rate": 7.924326432008874e-05, "loss": 0.0102, "step": 33610 }, { "epoch": 45.00669344042838, "grad_norm": 0.14542704820632935, "learning_rate": 7.922985092426422e-05, "loss": 0.0117, "step": 33620 }, { "epoch": 45.02008032128514, "grad_norm": 0.8372877836227417, "learning_rate": 7.921643433190717e-05, "loss": 0.0112, "step": 33630 }, { "epoch": 45.0334672021419, "grad_norm": 0.17623081803321838, "learning_rate": 7.920301454448478e-05, "loss": 0.0096, "step": 33640 }, { "epoch": 45.046854082998664, "grad_norm": 0.16508756577968597, "learning_rate": 7.918959156346461e-05, "loss": 0.012, "step": 33650 }, { "epoch": 45.06024096385542, "grad_norm": 0.20686249434947968, "learning_rate": 7.91761653903146e-05, "loss": 0.0126, "step": 33660 }, { "epoch": 45.073627844712185, "grad_norm": 0.1702040135860443, "learning_rate": 7.916273602650302e-05, "loss": 0.0107, "step": 33670 }, { "epoch": 45.08701472556894, "grad_norm": 0.6920449137687683, "learning_rate": 7.914930347349847e-05, "loss": 0.0107, "step": 33680 }, { "epoch": 45.100401606425706, "grad_norm": 0.16155517101287842, "learning_rate": 7.913586773276992e-05, "loss": 0.0113, "step": 33690 }, { "epoch": 45.11378848728246, "grad_norm": 0.20957130193710327, "learning_rate": 7.912242880578667e-05, "loss": 0.009, "step": 33700 }, { "epoch": 45.12717536813923, "grad_norm": 0.21478517353534698, "learning_rate": 7.910898669401839e-05, "loss": 0.0108, "step": 33710 }, { "epoch": 45.140562248995984, "grad_norm": 0.130763977766037, "learning_rate": 7.909554139893511e-05, "loss": 0.008, "step": 33720 }, { "epoch": 45.15394912985274, "grad_norm": 0.1281934231519699, "learning_rate": 7.908209292200715e-05, "loss": 0.012, "step": 33730 }, { "epoch": 45.167336010709505, "grad_norm": 0.11226044595241547, "learning_rate": 7.906864126470523e-05, "loss": 0.0096, "step": 33740 }, { "epoch": 45.18072289156626, "grad_norm": 0.1246185302734375, "learning_rate": 7.905518642850041e-05, "loss": 0.0086, "step": 33750 }, { "epoch": 45.194109772423026, "grad_norm": 0.15564019978046417, "learning_rate": 7.904172841486409e-05, "loss": 0.0084, "step": 33760 }, { "epoch": 45.20749665327978, "grad_norm": 1.3270702362060547, "learning_rate": 7.902826722526801e-05, "loss": 0.0081, "step": 33770 }, { "epoch": 45.22088353413655, "grad_norm": 0.16682063043117523, "learning_rate": 7.901480286118427e-05, "loss": 0.0118, "step": 33780 }, { "epoch": 45.234270414993304, "grad_norm": 0.19269172847270966, "learning_rate": 7.900133532408531e-05, "loss": 0.009, "step": 33790 }, { "epoch": 45.24765729585007, "grad_norm": 0.2899930775165558, "learning_rate": 7.898786461544395e-05, "loss": 0.0098, "step": 33800 }, { "epoch": 45.261044176706825, "grad_norm": 0.15326805412769318, "learning_rate": 7.897439073673325e-05, "loss": 0.0101, "step": 33810 }, { "epoch": 45.27443105756359, "grad_norm": 0.299788236618042, "learning_rate": 7.896091368942677e-05, "loss": 0.0126, "step": 33820 }, { "epoch": 45.287817938420346, "grad_norm": 0.2680665850639343, "learning_rate": 7.894743347499832e-05, "loss": 0.0096, "step": 33830 }, { "epoch": 45.30120481927711, "grad_norm": 0.11136343330144882, "learning_rate": 7.893395009492203e-05, "loss": 0.0099, "step": 33840 }, { "epoch": 45.31459170013387, "grad_norm": 0.1824866533279419, "learning_rate": 7.892046355067248e-05, "loss": 0.009, "step": 33850 }, { "epoch": 45.32797858099063, "grad_norm": 0.29421401023864746, "learning_rate": 7.890697384372451e-05, "loss": 0.0125, "step": 33860 }, { "epoch": 45.34136546184739, "grad_norm": 0.21681208908557892, "learning_rate": 7.889348097555336e-05, "loss": 0.0105, "step": 33870 }, { "epoch": 45.35475234270415, "grad_norm": 0.2154204100370407, "learning_rate": 7.887998494763455e-05, "loss": 0.0094, "step": 33880 }, { "epoch": 45.36813922356091, "grad_norm": 0.19713908433914185, "learning_rate": 7.886648576144404e-05, "loss": 0.0132, "step": 33890 }, { "epoch": 45.381526104417674, "grad_norm": 0.13602371513843536, "learning_rate": 7.885298341845802e-05, "loss": 0.0103, "step": 33900 }, { "epoch": 45.39491298527443, "grad_norm": 0.18874713778495789, "learning_rate": 7.883947792015311e-05, "loss": 0.0107, "step": 33910 }, { "epoch": 45.408299866131195, "grad_norm": 0.16675537824630737, "learning_rate": 7.882596926800628e-05, "loss": 0.0112, "step": 33920 }, { "epoch": 45.42168674698795, "grad_norm": 0.200835719704628, "learning_rate": 7.881245746349477e-05, "loss": 0.0128, "step": 33930 }, { "epoch": 45.43507362784471, "grad_norm": 0.24873840808868408, "learning_rate": 7.879894250809623e-05, "loss": 0.0096, "step": 33940 }, { "epoch": 45.44846050870147, "grad_norm": 0.3256852924823761, "learning_rate": 7.878542440328865e-05, "loss": 0.0098, "step": 33950 }, { "epoch": 45.46184738955823, "grad_norm": 0.17104282975196838, "learning_rate": 7.877190315055031e-05, "loss": 0.0115, "step": 33960 }, { "epoch": 45.475234270414994, "grad_norm": 0.21056747436523438, "learning_rate": 7.875837875135991e-05, "loss": 0.0125, "step": 33970 }, { "epoch": 45.48862115127175, "grad_norm": 0.2629966139793396, "learning_rate": 7.874485120719646e-05, "loss": 0.0099, "step": 33980 }, { "epoch": 45.502008032128515, "grad_norm": 0.1775771975517273, "learning_rate": 7.873132051953928e-05, "loss": 0.0103, "step": 33990 }, { "epoch": 45.51539491298527, "grad_norm": 0.3979889154434204, "learning_rate": 7.87177866898681e-05, "loss": 0.0104, "step": 34000 }, { "epoch": 45.528781793842036, "grad_norm": 0.15793924033641815, "learning_rate": 7.870424971966294e-05, "loss": 0.0099, "step": 34010 }, { "epoch": 45.54216867469879, "grad_norm": 0.21932606399059296, "learning_rate": 7.869070961040419e-05, "loss": 0.0103, "step": 34020 }, { "epoch": 45.55555555555556, "grad_norm": 0.3363785743713379, "learning_rate": 7.867716636357257e-05, "loss": 0.011, "step": 34030 }, { "epoch": 45.568942436412314, "grad_norm": 1.8540983200073242, "learning_rate": 7.866361998064915e-05, "loss": 0.0109, "step": 34040 }, { "epoch": 45.58232931726908, "grad_norm": 0.18388031423091888, "learning_rate": 7.865007046311534e-05, "loss": 0.01, "step": 34050 }, { "epoch": 45.595716198125835, "grad_norm": 0.1539708822965622, "learning_rate": 7.86365178124529e-05, "loss": 0.0108, "step": 34060 }, { "epoch": 45.6091030789826, "grad_norm": 0.21739637851715088, "learning_rate": 7.862296203014394e-05, "loss": 0.0105, "step": 34070 }, { "epoch": 45.622489959839356, "grad_norm": 0.14624528586864471, "learning_rate": 7.860940311767088e-05, "loss": 0.011, "step": 34080 }, { "epoch": 45.63587684069612, "grad_norm": 0.1364096999168396, "learning_rate": 7.85958410765165e-05, "loss": 0.0104, "step": 34090 }, { "epoch": 45.64926372155288, "grad_norm": 0.23664820194244385, "learning_rate": 7.858227590816394e-05, "loss": 0.0092, "step": 34100 }, { "epoch": 45.66265060240964, "grad_norm": 0.4108765721321106, "learning_rate": 7.856870761409664e-05, "loss": 0.0098, "step": 34110 }, { "epoch": 45.6760374832664, "grad_norm": 0.1980501264333725, "learning_rate": 7.855513619579846e-05, "loss": 0.0125, "step": 34120 }, { "epoch": 45.68942436412316, "grad_norm": 0.12932218611240387, "learning_rate": 7.85415616547535e-05, "loss": 0.0085, "step": 34130 }, { "epoch": 45.70281124497992, "grad_norm": 0.44637733697891235, "learning_rate": 7.852798399244627e-05, "loss": 0.0109, "step": 34140 }, { "epoch": 45.716198125836684, "grad_norm": 0.1581287980079651, "learning_rate": 7.851440321036161e-05, "loss": 0.009, "step": 34150 }, { "epoch": 45.72958500669344, "grad_norm": 0.1716047078371048, "learning_rate": 7.850081930998468e-05, "loss": 0.0083, "step": 34160 }, { "epoch": 45.7429718875502, "grad_norm": 0.2782636880874634, "learning_rate": 7.8487232292801e-05, "loss": 0.01, "step": 34170 }, { "epoch": 45.75635876840696, "grad_norm": 0.11030416935682297, "learning_rate": 7.847364216029642e-05, "loss": 0.0098, "step": 34180 }, { "epoch": 45.76974564926372, "grad_norm": 0.2124360203742981, "learning_rate": 7.846004891395716e-05, "loss": 0.0106, "step": 34190 }, { "epoch": 45.78313253012048, "grad_norm": 2.2966322898864746, "learning_rate": 7.844645255526972e-05, "loss": 0.0098, "step": 34200 }, { "epoch": 45.79651941097724, "grad_norm": 0.15127462148666382, "learning_rate": 7.843285308572101e-05, "loss": 0.0096, "step": 34210 }, { "epoch": 45.809906291834004, "grad_norm": 0.1396835297346115, "learning_rate": 7.841925050679823e-05, "loss": 0.0093, "step": 34220 }, { "epoch": 45.82329317269076, "grad_norm": 0.13771283626556396, "learning_rate": 7.840564481998895e-05, "loss": 0.0131, "step": 34230 }, { "epoch": 45.836680053547525, "grad_norm": 0.12716998159885406, "learning_rate": 7.839203602678105e-05, "loss": 0.0096, "step": 34240 }, { "epoch": 45.85006693440428, "grad_norm": 0.20390048623085022, "learning_rate": 7.837842412866279e-05, "loss": 0.0085, "step": 34250 }, { "epoch": 45.863453815261046, "grad_norm": 0.27414679527282715, "learning_rate": 7.836480912712272e-05, "loss": 0.0085, "step": 34260 }, { "epoch": 45.8768406961178, "grad_norm": 0.22507891058921814, "learning_rate": 7.835119102364976e-05, "loss": 0.0103, "step": 34270 }, { "epoch": 45.89022757697457, "grad_norm": 0.206025630235672, "learning_rate": 7.83375698197332e-05, "loss": 0.0088, "step": 34280 }, { "epoch": 45.903614457831324, "grad_norm": 0.2294766753911972, "learning_rate": 7.83239455168626e-05, "loss": 0.0106, "step": 34290 }, { "epoch": 45.91700133868809, "grad_norm": 0.1802830547094345, "learning_rate": 7.83103181165279e-05, "loss": 0.0103, "step": 34300 }, { "epoch": 45.930388219544845, "grad_norm": 0.18411514163017273, "learning_rate": 7.829668762021937e-05, "loss": 0.0109, "step": 34310 }, { "epoch": 45.94377510040161, "grad_norm": 0.2060641646385193, "learning_rate": 7.828305402942764e-05, "loss": 0.0104, "step": 34320 }, { "epoch": 45.957161981258366, "grad_norm": 0.262641042470932, "learning_rate": 7.826941734564363e-05, "loss": 0.0099, "step": 34330 }, { "epoch": 45.97054886211513, "grad_norm": 0.15357761085033417, "learning_rate": 7.825577757035865e-05, "loss": 0.0101, "step": 34340 }, { "epoch": 45.98393574297189, "grad_norm": 0.18938645720481873, "learning_rate": 7.824213470506431e-05, "loss": 0.0142, "step": 34350 }, { "epoch": 45.99732262382865, "grad_norm": 0.2325040102005005, "learning_rate": 7.822848875125257e-05, "loss": 0.0122, "step": 34360 }, { "epoch": 46.01070950468541, "grad_norm": 0.2956486940383911, "learning_rate": 7.821483971041576e-05, "loss": 0.0136, "step": 34370 }, { "epoch": 46.024096385542165, "grad_norm": 0.19545474648475647, "learning_rate": 7.820118758404649e-05, "loss": 0.0114, "step": 34380 }, { "epoch": 46.03748326639893, "grad_norm": 0.14578129351139069, "learning_rate": 7.818753237363776e-05, "loss": 0.0124, "step": 34390 }, { "epoch": 46.05087014725569, "grad_norm": 0.3061385452747345, "learning_rate": 7.817387408068286e-05, "loss": 0.0104, "step": 34400 }, { "epoch": 46.06425702811245, "grad_norm": 0.20385529100894928, "learning_rate": 7.816021270667544e-05, "loss": 0.0086, "step": 34410 }, { "epoch": 46.07764390896921, "grad_norm": 0.1593228429555893, "learning_rate": 7.81465482531095e-05, "loss": 0.0139, "step": 34420 }, { "epoch": 46.09103078982597, "grad_norm": 0.27153480052948, "learning_rate": 7.813288072147938e-05, "loss": 0.0097, "step": 34430 }, { "epoch": 46.10441767068273, "grad_norm": 0.20834936201572418, "learning_rate": 7.811921011327972e-05, "loss": 0.0118, "step": 34440 }, { "epoch": 46.11780455153949, "grad_norm": 0.8762113451957703, "learning_rate": 7.810553643000549e-05, "loss": 0.0108, "step": 34450 }, { "epoch": 46.13119143239625, "grad_norm": 0.17372244596481323, "learning_rate": 7.809185967315206e-05, "loss": 0.0093, "step": 34460 }, { "epoch": 46.144578313253014, "grad_norm": 0.2614113688468933, "learning_rate": 7.80781798442151e-05, "loss": 0.01, "step": 34470 }, { "epoch": 46.15796519410977, "grad_norm": 0.14835263788700104, "learning_rate": 7.806449694469061e-05, "loss": 0.0102, "step": 34480 }, { "epoch": 46.171352074966535, "grad_norm": 0.18795287609100342, "learning_rate": 7.805081097607492e-05, "loss": 0.0091, "step": 34490 }, { "epoch": 46.18473895582329, "grad_norm": 0.21945954859256744, "learning_rate": 7.803712193986474e-05, "loss": 0.0101, "step": 34500 }, { "epoch": 46.198125836680056, "grad_norm": 1.1561808586120605, "learning_rate": 7.802342983755702e-05, "loss": 0.0112, "step": 34510 }, { "epoch": 46.21151271753681, "grad_norm": 0.24531520903110504, "learning_rate": 7.800973467064918e-05, "loss": 0.0091, "step": 34520 }, { "epoch": 46.22489959839358, "grad_norm": 0.13253960013389587, "learning_rate": 7.799603644063884e-05, "loss": 0.0138, "step": 34530 }, { "epoch": 46.238286479250334, "grad_norm": 0.2389409989118576, "learning_rate": 7.798233514902405e-05, "loss": 0.0114, "step": 34540 }, { "epoch": 46.2516733601071, "grad_norm": 0.1740535944700241, "learning_rate": 7.796863079730318e-05, "loss": 0.0096, "step": 34550 }, { "epoch": 46.265060240963855, "grad_norm": 0.19814057648181915, "learning_rate": 7.795492338697488e-05, "loss": 0.0103, "step": 34560 }, { "epoch": 46.27844712182062, "grad_norm": 0.4103662967681885, "learning_rate": 7.794121291953819e-05, "loss": 0.0135, "step": 34570 }, { "epoch": 46.291834002677376, "grad_norm": 0.4740297198295593, "learning_rate": 7.792749939649246e-05, "loss": 0.0114, "step": 34580 }, { "epoch": 46.30522088353413, "grad_norm": 0.14351263642311096, "learning_rate": 7.79137828193374e-05, "loss": 0.0102, "step": 34590 }, { "epoch": 46.3186077643909, "grad_norm": 0.17801357805728912, "learning_rate": 7.790006318957301e-05, "loss": 0.0095, "step": 34600 }, { "epoch": 46.331994645247654, "grad_norm": 0.23005534708499908, "learning_rate": 7.788634050869965e-05, "loss": 0.01, "step": 34610 }, { "epoch": 46.34538152610442, "grad_norm": 0.21610286831855774, "learning_rate": 7.787261477821803e-05, "loss": 0.0093, "step": 34620 }, { "epoch": 46.358768406961175, "grad_norm": 0.2530953586101532, "learning_rate": 7.785888599962916e-05, "loss": 0.0111, "step": 34630 }, { "epoch": 46.37215528781794, "grad_norm": 0.2097485214471817, "learning_rate": 7.784515417443439e-05, "loss": 0.0113, "step": 34640 }, { "epoch": 46.3855421686747, "grad_norm": 0.34987902641296387, "learning_rate": 7.783141930413545e-05, "loss": 0.0091, "step": 34650 }, { "epoch": 46.39892904953146, "grad_norm": 0.26586735248565674, "learning_rate": 7.78176813902343e-05, "loss": 0.0077, "step": 34660 }, { "epoch": 46.41231593038822, "grad_norm": 0.17669059336185455, "learning_rate": 7.780394043423336e-05, "loss": 0.0095, "step": 34670 }, { "epoch": 46.42570281124498, "grad_norm": 0.25490403175354004, "learning_rate": 7.77901964376353e-05, "loss": 0.0112, "step": 34680 }, { "epoch": 46.43908969210174, "grad_norm": 0.1846359670162201, "learning_rate": 7.777644940194316e-05, "loss": 0.0103, "step": 34690 }, { "epoch": 46.4524765729585, "grad_norm": 0.13442909717559814, "learning_rate": 7.776269932866023e-05, "loss": 0.0078, "step": 34700 }, { "epoch": 46.46586345381526, "grad_norm": 0.1521899402141571, "learning_rate": 7.774894621929026e-05, "loss": 0.0093, "step": 34710 }, { "epoch": 46.479250334672024, "grad_norm": 0.2118571400642395, "learning_rate": 7.773519007533725e-05, "loss": 0.0085, "step": 34720 }, { "epoch": 46.49263721552878, "grad_norm": 0.1719042807817459, "learning_rate": 7.772143089830556e-05, "loss": 0.009, "step": 34730 }, { "epoch": 46.506024096385545, "grad_norm": 0.2089385688304901, "learning_rate": 7.770766868969985e-05, "loss": 0.0092, "step": 34740 }, { "epoch": 46.5194109772423, "grad_norm": 1.8907673358917236, "learning_rate": 7.769390345102518e-05, "loss": 0.0095, "step": 34750 }, { "epoch": 46.532797858099066, "grad_norm": 0.386915922164917, "learning_rate": 7.768013518378683e-05, "loss": 0.0093, "step": 34760 }, { "epoch": 46.54618473895582, "grad_norm": 0.29952436685562134, "learning_rate": 7.766636388949053e-05, "loss": 0.0106, "step": 34770 }, { "epoch": 46.55957161981259, "grad_norm": 0.24426484107971191, "learning_rate": 7.765258956964229e-05, "loss": 0.0114, "step": 34780 }, { "epoch": 46.572958500669344, "grad_norm": 0.19223396480083466, "learning_rate": 7.76388122257484e-05, "loss": 0.0125, "step": 34790 }, { "epoch": 46.5863453815261, "grad_norm": 0.13537928462028503, "learning_rate": 7.762503185931558e-05, "loss": 0.0088, "step": 34800 }, { "epoch": 46.599732262382865, "grad_norm": 0.23816117644309998, "learning_rate": 7.76112484718508e-05, "loss": 0.0108, "step": 34810 }, { "epoch": 46.61311914323962, "grad_norm": 0.281819224357605, "learning_rate": 7.75974620648614e-05, "loss": 0.0102, "step": 34820 }, { "epoch": 46.626506024096386, "grad_norm": 0.14683499932289124, "learning_rate": 7.758367263985503e-05, "loss": 0.0108, "step": 34830 }, { "epoch": 46.63989290495314, "grad_norm": 0.6837897896766663, "learning_rate": 7.75698801983397e-05, "loss": 0.0127, "step": 34840 }, { "epoch": 46.65327978580991, "grad_norm": 0.1430615931749344, "learning_rate": 7.755608474182372e-05, "loss": 0.0094, "step": 34850 }, { "epoch": 46.666666666666664, "grad_norm": 0.16634605824947357, "learning_rate": 7.754228627181574e-05, "loss": 0.0102, "step": 34860 }, { "epoch": 46.68005354752343, "grad_norm": 0.2128564864397049, "learning_rate": 7.752848478982476e-05, "loss": 0.0123, "step": 34870 }, { "epoch": 46.693440428380185, "grad_norm": 0.7524769902229309, "learning_rate": 7.751468029736006e-05, "loss": 0.0106, "step": 34880 }, { "epoch": 46.70682730923695, "grad_norm": 0.23373287916183472, "learning_rate": 7.750087279593129e-05, "loss": 0.0099, "step": 34890 }, { "epoch": 46.72021419009371, "grad_norm": 0.206715926527977, "learning_rate": 7.748706228704843e-05, "loss": 0.012, "step": 34900 }, { "epoch": 46.73360107095047, "grad_norm": 0.16920629143714905, "learning_rate": 7.747324877222176e-05, "loss": 0.0094, "step": 34910 }, { "epoch": 46.74698795180723, "grad_norm": 0.12697528302669525, "learning_rate": 7.745943225296188e-05, "loss": 0.0101, "step": 34920 }, { "epoch": 46.76037483266399, "grad_norm": 0.22760693728923798, "learning_rate": 7.744561273077981e-05, "loss": 0.0095, "step": 34930 }, { "epoch": 46.77376171352075, "grad_norm": 0.34944888949394226, "learning_rate": 7.743179020718678e-05, "loss": 0.0087, "step": 34940 }, { "epoch": 46.78714859437751, "grad_norm": 0.23433127999305725, "learning_rate": 7.741796468369443e-05, "loss": 0.0104, "step": 34950 }, { "epoch": 46.80053547523427, "grad_norm": 0.20954188704490662, "learning_rate": 7.740413616181466e-05, "loss": 0.0098, "step": 34960 }, { "epoch": 46.813922356091034, "grad_norm": 0.18132947385311127, "learning_rate": 7.739030464305978e-05, "loss": 0.012, "step": 34970 }, { "epoch": 46.82730923694779, "grad_norm": 0.14634039998054504, "learning_rate": 7.737647012894235e-05, "loss": 0.0111, "step": 34980 }, { "epoch": 46.840696117804555, "grad_norm": 0.2703489661216736, "learning_rate": 7.736263262097532e-05, "loss": 0.011, "step": 34990 }, { "epoch": 46.85408299866131, "grad_norm": 0.15652509033679962, "learning_rate": 7.734879212067192e-05, "loss": 0.0096, "step": 35000 } ], "logging_steps": 10, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 134, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }