{ "best_metric": 0.8168653004456122, "best_model_checkpoint": "/data/hungnm/unisentiment/roberta-base-sentiment/checkpoint-15370", "epoch": 5.0, "eval_steps": 500, "global_step": 15370, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016265452179570592, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.1785, "step": 5 }, { "epoch": 0.0032530904359141183, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.1908, "step": 10 }, { "epoch": 0.004879635653871178, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.1871, "step": 15 }, { "epoch": 0.006506180871828237, "grad_norm": 0.5645683407783508, "learning_rate": 1.2987012987012988e-06, "loss": 2.1871, "step": 20 }, { "epoch": 0.008132726089785295, "grad_norm": 0.5938918590545654, "learning_rate": 2.922077922077922e-06, "loss": 2.1877, "step": 25 }, { "epoch": 0.009759271307742356, "grad_norm": 0.2991328835487366, "learning_rate": 4.5454545454545455e-06, "loss": 2.1762, "step": 30 }, { "epoch": 0.011385816525699415, "grad_norm": 0.3660571277141571, "learning_rate": 6.168831168831169e-06, "loss": 2.1846, "step": 35 }, { "epoch": 0.013012361743656473, "grad_norm": 0.40230146050453186, "learning_rate": 7.792207792207792e-06, "loss": 2.1768, "step": 40 }, { "epoch": 0.014638906961613532, "grad_norm": 0.9246541857719421, "learning_rate": 9.415584415584416e-06, "loss": 2.1666, "step": 45 }, { "epoch": 0.01626545217957059, "grad_norm": 1.4749853610992432, "learning_rate": 1.103896103896104e-05, "loss": 2.1117, "step": 50 }, { "epoch": 0.017891997397527653, "grad_norm": 3.7031211853027344, "learning_rate": 1.2337662337662339e-05, "loss": 2.0258, "step": 55 }, { "epoch": 0.01951854261548471, "grad_norm": 4.31064510345459, "learning_rate": 1.396103896103896e-05, "loss": 1.8491, "step": 60 }, { "epoch": 0.02114508783344177, "grad_norm": 2.758653163909912, "learning_rate": 1.5584415584415583e-05, "loss": 1.6744, "step": 65 }, { "epoch": 0.02277163305139883, "grad_norm": 3.5859150886535645, "learning_rate": 1.7207792207792208e-05, "loss": 1.62, "step": 70 }, { "epoch": 0.024398178269355888, "grad_norm": 6.705416202545166, "learning_rate": 1.8831168831168833e-05, "loss": 1.5734, "step": 75 }, { "epoch": 0.026024723487312947, "grad_norm": 3.9373040199279785, "learning_rate": 2.0454545454545457e-05, "loss": 1.5028, "step": 80 }, { "epoch": 0.027651268705270005, "grad_norm": 2.206629991531372, "learning_rate": 2.1753246753246752e-05, "loss": 1.4542, "step": 85 }, { "epoch": 0.029277813923227064, "grad_norm": 5.32952356338501, "learning_rate": 2.3376623376623376e-05, "loss": 1.4486, "step": 90 }, { "epoch": 0.030904359141184126, "grad_norm": 6.3421735763549805, "learning_rate": 2.5e-05, "loss": 1.3987, "step": 95 }, { "epoch": 0.03253090435914118, "grad_norm": 4.816642761230469, "learning_rate": 2.6623376623376623e-05, "loss": 1.3467, "step": 100 }, { "epoch": 0.034157449577098244, "grad_norm": 2.762737512588501, "learning_rate": 2.792207792207792e-05, "loss": 1.393, "step": 105 }, { "epoch": 0.035783994795055306, "grad_norm": 2.6444923877716064, "learning_rate": 2.954545454545455e-05, "loss": 1.3371, "step": 110 }, { "epoch": 0.03741054001301236, "grad_norm": 2.527705430984497, "learning_rate": 3.1168831168831166e-05, "loss": 1.3085, "step": 115 }, { "epoch": 0.03903708523096942, "grad_norm": 7.088854789733887, "learning_rate": 3.27922077922078e-05, "loss": 1.2924, "step": 120 }, { "epoch": 0.04066363044892648, "grad_norm": 8.464285850524902, "learning_rate": 3.4415584415584416e-05, "loss": 1.3432, "step": 125 }, { "epoch": 0.04229017566688354, "grad_norm": 2.563727855682373, "learning_rate": 3.603896103896104e-05, "loss": 1.3283, "step": 130 }, { "epoch": 0.043916720884840596, "grad_norm": 2.215078592300415, "learning_rate": 3.7662337662337665e-05, "loss": 1.2709, "step": 135 }, { "epoch": 0.04554326610279766, "grad_norm": 2.0892443656921387, "learning_rate": 3.928571428571429e-05, "loss": 1.2826, "step": 140 }, { "epoch": 0.04716981132075472, "grad_norm": 2.6589133739471436, "learning_rate": 4.0909090909090915e-05, "loss": 1.2191, "step": 145 }, { "epoch": 0.048796356538711776, "grad_norm": 2.4304778575897217, "learning_rate": 4.253246753246753e-05, "loss": 1.2007, "step": 150 }, { "epoch": 0.05042290175666884, "grad_norm": 4.696134567260742, "learning_rate": 4.415584415584416e-05, "loss": 1.2091, "step": 155 }, { "epoch": 0.05204944697462589, "grad_norm": 4.776639938354492, "learning_rate": 4.577922077922078e-05, "loss": 1.2263, "step": 160 }, { "epoch": 0.053675992192582955, "grad_norm": 5.665904998779297, "learning_rate": 4.740259740259741e-05, "loss": 1.2484, "step": 165 }, { "epoch": 0.05530253741054001, "grad_norm": 1.786213994026184, "learning_rate": 4.902597402597403e-05, "loss": 1.2267, "step": 170 }, { "epoch": 0.05692908262849707, "grad_norm": 5.322690963745117, "learning_rate": 4.999999786858144e-05, "loss": 1.2171, "step": 175 }, { "epoch": 0.05855562784645413, "grad_norm": 3.677067756652832, "learning_rate": 4.999997389012675e-05, "loss": 1.1835, "step": 180 }, { "epoch": 0.06018217306441119, "grad_norm": 2.1142375469207764, "learning_rate": 4.99999232689698e-05, "loss": 1.1857, "step": 185 }, { "epoch": 0.06180871828236825, "grad_norm": 2.6471405029296875, "learning_rate": 4.9999846005164544e-05, "loss": 1.1535, "step": 190 }, { "epoch": 0.06343526350032531, "grad_norm": 3.7304134368896484, "learning_rate": 4.999974209879331e-05, "loss": 1.1885, "step": 195 }, { "epoch": 0.06506180871828236, "grad_norm": 3.2092204093933105, "learning_rate": 4.999961154996685e-05, "loss": 1.1447, "step": 200 }, { "epoch": 0.06668835393623943, "grad_norm": 3.622004985809326, "learning_rate": 4.999945435882428e-05, "loss": 1.184, "step": 205 }, { "epoch": 0.06831489915419649, "grad_norm": 4.286761283874512, "learning_rate": 4.999927052553313e-05, "loss": 1.1606, "step": 210 }, { "epoch": 0.06994144437215355, "grad_norm": 3.743877410888672, "learning_rate": 4.9999060050289286e-05, "loss": 1.1701, "step": 215 }, { "epoch": 0.07156798959011061, "grad_norm": 4.048401832580566, "learning_rate": 4.999882293331708e-05, "loss": 1.1408, "step": 220 }, { "epoch": 0.07319453480806766, "grad_norm": 2.202915668487549, "learning_rate": 4.999855917486921e-05, "loss": 1.1097, "step": 225 }, { "epoch": 0.07482108002602472, "grad_norm": 2.1554341316223145, "learning_rate": 4.999826877522675e-05, "loss": 1.1379, "step": 230 }, { "epoch": 0.07644762524398178, "grad_norm": 4.172390460968018, "learning_rate": 4.999795173469919e-05, "loss": 1.1284, "step": 235 }, { "epoch": 0.07807417046193885, "grad_norm": 2.628201961517334, "learning_rate": 4.99976080536244e-05, "loss": 1.1351, "step": 240 }, { "epoch": 0.0797007156798959, "grad_norm": 3.509880542755127, "learning_rate": 4.9997237732368645e-05, "loss": 1.1505, "step": 245 }, { "epoch": 0.08132726089785296, "grad_norm": 2.4403703212738037, "learning_rate": 4.9996840771326584e-05, "loss": 1.1101, "step": 250 }, { "epoch": 0.08295380611581002, "grad_norm": 2.3883931636810303, "learning_rate": 4.999641717092126e-05, "loss": 1.0921, "step": 255 }, { "epoch": 0.08458035133376708, "grad_norm": 2.4817776679992676, "learning_rate": 4.99959669316041e-05, "loss": 1.0737, "step": 260 }, { "epoch": 0.08620689655172414, "grad_norm": 3.0016984939575195, "learning_rate": 4.999549005385494e-05, "loss": 1.0776, "step": 265 }, { "epoch": 0.08783344176968119, "grad_norm": 3.2658755779266357, "learning_rate": 4.999498653818199e-05, "loss": 1.1237, "step": 270 }, { "epoch": 0.08945998698763825, "grad_norm": 1.7791502475738525, "learning_rate": 4.999445638512185e-05, "loss": 1.1098, "step": 275 }, { "epoch": 0.09108653220559532, "grad_norm": 2.0501716136932373, "learning_rate": 4.99938995952395e-05, "loss": 1.1065, "step": 280 }, { "epoch": 0.09271307742355238, "grad_norm": 1.799959421157837, "learning_rate": 4.9993316169128334e-05, "loss": 1.0776, "step": 285 }, { "epoch": 0.09433962264150944, "grad_norm": 2.0709333419799805, "learning_rate": 4.99927061074101e-05, "loss": 1.0673, "step": 290 }, { "epoch": 0.09596616785946649, "grad_norm": 2.877933979034424, "learning_rate": 4.999206941073496e-05, "loss": 1.0672, "step": 295 }, { "epoch": 0.09759271307742355, "grad_norm": 2.737412452697754, "learning_rate": 4.9991406079781424e-05, "loss": 1.0709, "step": 300 }, { "epoch": 0.09921925829538061, "grad_norm": 2.7848894596099854, "learning_rate": 4.999071611525643e-05, "loss": 1.0826, "step": 305 }, { "epoch": 0.10084580351333768, "grad_norm": 4.676877021789551, "learning_rate": 4.998999951789528e-05, "loss": 1.0888, "step": 310 }, { "epoch": 0.10247234873129472, "grad_norm": 2.099736213684082, "learning_rate": 4.998925628846164e-05, "loss": 1.0901, "step": 315 }, { "epoch": 0.10409889394925179, "grad_norm": 1.9290549755096436, "learning_rate": 4.9988486427747606e-05, "loss": 1.0875, "step": 320 }, { "epoch": 0.10572543916720885, "grad_norm": 2.3181591033935547, "learning_rate": 4.99876899365736e-05, "loss": 1.0417, "step": 325 }, { "epoch": 0.10735198438516591, "grad_norm": 1.5860693454742432, "learning_rate": 4.998686681578846e-05, "loss": 1.1184, "step": 330 }, { "epoch": 0.10897852960312297, "grad_norm": 2.7544357776641846, "learning_rate": 4.998601706626938e-05, "loss": 1.0811, "step": 335 }, { "epoch": 0.11060507482108002, "grad_norm": 3.281580686569214, "learning_rate": 4.9985140688921975e-05, "loss": 1.064, "step": 340 }, { "epoch": 0.11223162003903708, "grad_norm": 2.390293836593628, "learning_rate": 4.9984237684680194e-05, "loss": 1.0741, "step": 345 }, { "epoch": 0.11385816525699415, "grad_norm": 1.8648512363433838, "learning_rate": 4.998330805450636e-05, "loss": 1.0554, "step": 350 }, { "epoch": 0.11548471047495121, "grad_norm": 2.891627788543701, "learning_rate": 4.998235179939122e-05, "loss": 1.0572, "step": 355 }, { "epoch": 0.11711125569290826, "grad_norm": 1.6847642660140991, "learning_rate": 4.998136892035382e-05, "loss": 1.0691, "step": 360 }, { "epoch": 0.11873780091086532, "grad_norm": 3.779186248779297, "learning_rate": 4.998035941844167e-05, "loss": 1.0556, "step": 365 }, { "epoch": 0.12036434612882238, "grad_norm": 2.99685001373291, "learning_rate": 4.997932329473058e-05, "loss": 1.0671, "step": 370 }, { "epoch": 0.12199089134677944, "grad_norm": 3.1010093688964844, "learning_rate": 4.997826055032476e-05, "loss": 1.0339, "step": 375 }, { "epoch": 0.1236174365647365, "grad_norm": 9.493967056274414, "learning_rate": 4.99771711863568e-05, "loss": 1.0966, "step": 380 }, { "epoch": 0.12524398178269355, "grad_norm": 6.169688701629639, "learning_rate": 4.997605520398762e-05, "loss": 1.1013, "step": 385 }, { "epoch": 0.12687052700065063, "grad_norm": 3.5889151096343994, "learning_rate": 4.9974912604406554e-05, "loss": 1.1073, "step": 390 }, { "epoch": 0.12849707221860768, "grad_norm": 3.6316006183624268, "learning_rate": 4.997374338883127e-05, "loss": 1.0445, "step": 395 }, { "epoch": 0.13012361743656473, "grad_norm": 1.7782939672470093, "learning_rate": 4.9972547558507815e-05, "loss": 1.056, "step": 400 }, { "epoch": 0.1317501626545218, "grad_norm": 1.9511332511901855, "learning_rate": 4.99713251147106e-05, "loss": 1.0564, "step": 405 }, { "epoch": 0.13337670787247885, "grad_norm": 2.3763427734375, "learning_rate": 4.997007605874239e-05, "loss": 1.0318, "step": 410 }, { "epoch": 0.13500325309043593, "grad_norm": 2.193451166152954, "learning_rate": 4.996880039193431e-05, "loss": 1.0415, "step": 415 }, { "epoch": 0.13662979830839297, "grad_norm": 2.155362367630005, "learning_rate": 4.996749811564586e-05, "loss": 1.0557, "step": 420 }, { "epoch": 0.13825634352635002, "grad_norm": 1.7424428462982178, "learning_rate": 4.996616923126488e-05, "loss": 1.0375, "step": 425 }, { "epoch": 0.1398828887443071, "grad_norm": 1.6261824369430542, "learning_rate": 4.996481374020759e-05, "loss": 1.0727, "step": 430 }, { "epoch": 0.14150943396226415, "grad_norm": 1.7393962144851685, "learning_rate": 4.996343164391853e-05, "loss": 1.0324, "step": 435 }, { "epoch": 0.14313597918022122, "grad_norm": 1.6991339921951294, "learning_rate": 4.9962022943870626e-05, "loss": 1.0131, "step": 440 }, { "epoch": 0.14476252439817827, "grad_norm": 2.201045274734497, "learning_rate": 4.9960587641565125e-05, "loss": 1.0369, "step": 445 }, { "epoch": 0.14638906961613532, "grad_norm": 3.974395751953125, "learning_rate": 4.995912573853166e-05, "loss": 1.0368, "step": 450 }, { "epoch": 0.1480156148340924, "grad_norm": 1.9366528987884521, "learning_rate": 4.995793706462675e-05, "loss": 1.0608, "step": 455 }, { "epoch": 0.14964216005204944, "grad_norm": 1.74889075756073, "learning_rate": 4.995642728422806e-05, "loss": 1.0354, "step": 460 }, { "epoch": 0.1512687052700065, "grad_norm": 2.5088062286376953, "learning_rate": 4.9954890907535126e-05, "loss": 1.0232, "step": 465 }, { "epoch": 0.15289525048796357, "grad_norm": 2.361210346221924, "learning_rate": 4.995332793618529e-05, "loss": 1.0654, "step": 470 }, { "epoch": 0.15452179570592062, "grad_norm": 2.3242251873016357, "learning_rate": 4.995173837184421e-05, "loss": 1.0737, "step": 475 }, { "epoch": 0.1561483409238777, "grad_norm": 2.0955278873443604, "learning_rate": 4.995012221620592e-05, "loss": 1.0452, "step": 480 }, { "epoch": 0.15777488614183474, "grad_norm": 2.6136956214904785, "learning_rate": 4.994847947099275e-05, "loss": 1.0127, "step": 485 }, { "epoch": 0.1594014313597918, "grad_norm": 1.6822832822799683, "learning_rate": 4.9946810137955404e-05, "loss": 1.0335, "step": 490 }, { "epoch": 0.16102797657774887, "grad_norm": 4.146617412567139, "learning_rate": 4.99451142188729e-05, "loss": 1.0368, "step": 495 }, { "epoch": 0.16265452179570591, "grad_norm": 2.445263147354126, "learning_rate": 4.994339171555259e-05, "loss": 1.0295, "step": 500 }, { "epoch": 0.164281067013663, "grad_norm": 1.8524963855743408, "learning_rate": 4.994164262983018e-05, "loss": 0.999, "step": 505 }, { "epoch": 0.16590761223162004, "grad_norm": 1.6552244424819946, "learning_rate": 4.993986696356966e-05, "loss": 1.0532, "step": 510 }, { "epoch": 0.1675341574495771, "grad_norm": 1.4219648838043213, "learning_rate": 4.9938064718663393e-05, "loss": 1.0255, "step": 515 }, { "epoch": 0.16916070266753416, "grad_norm": 2.2062759399414062, "learning_rate": 4.9936235897032035e-05, "loss": 1.0312, "step": 520 }, { "epoch": 0.1707872478854912, "grad_norm": 2.1640946865081787, "learning_rate": 4.9934380500624586e-05, "loss": 1.0103, "step": 525 }, { "epoch": 0.1724137931034483, "grad_norm": 2.7513413429260254, "learning_rate": 4.993249853141837e-05, "loss": 1.022, "step": 530 }, { "epoch": 0.17404033832140534, "grad_norm": 1.6363236904144287, "learning_rate": 4.9930589991419e-05, "loss": 1.011, "step": 535 }, { "epoch": 0.17566688353936238, "grad_norm": 1.9132270812988281, "learning_rate": 4.992865488266043e-05, "loss": 0.994, "step": 540 }, { "epoch": 0.17729342875731946, "grad_norm": 1.7427923679351807, "learning_rate": 4.9926693207204925e-05, "loss": 1.0189, "step": 545 }, { "epoch": 0.1789199739752765, "grad_norm": 2.8523027896881104, "learning_rate": 4.9924704967143064e-05, "loss": 0.994, "step": 550 }, { "epoch": 0.18054651919323358, "grad_norm": 1.653673768043518, "learning_rate": 4.992269016459373e-05, "loss": 1.0398, "step": 555 }, { "epoch": 0.18217306441119063, "grad_norm": 2.3957266807556152, "learning_rate": 4.9920648801704103e-05, "loss": 1.016, "step": 560 }, { "epoch": 0.18379960962914768, "grad_norm": 2.5810859203338623, "learning_rate": 4.991858088064971e-05, "loss": 1.0133, "step": 565 }, { "epoch": 0.18542615484710476, "grad_norm": 3.059021234512329, "learning_rate": 4.991648640363434e-05, "loss": 0.9728, "step": 570 }, { "epoch": 0.1870527000650618, "grad_norm": 2.2259278297424316, "learning_rate": 4.991436537289009e-05, "loss": 0.9972, "step": 575 }, { "epoch": 0.18867924528301888, "grad_norm": 1.805478811264038, "learning_rate": 4.9912217790677365e-05, "loss": 0.9773, "step": 580 }, { "epoch": 0.19030579050097593, "grad_norm": 2.030750036239624, "learning_rate": 4.991004365928487e-05, "loss": 1.0185, "step": 585 }, { "epoch": 0.19193233571893298, "grad_norm": 1.438374638557434, "learning_rate": 4.990784298102959e-05, "loss": 0.9636, "step": 590 }, { "epoch": 0.19355888093689005, "grad_norm": 2.8149659633636475, "learning_rate": 4.99056157582568e-05, "loss": 0.9657, "step": 595 }, { "epoch": 0.1951854261548471, "grad_norm": 1.8356095552444458, "learning_rate": 4.9903361993340095e-05, "loss": 1.0332, "step": 600 }, { "epoch": 0.19681197137280415, "grad_norm": 1.7523188591003418, "learning_rate": 4.9901081688681314e-05, "loss": 0.9879, "step": 605 }, { "epoch": 0.19843851659076123, "grad_norm": 2.0517983436584473, "learning_rate": 4.989877484671061e-05, "loss": 0.9925, "step": 610 }, { "epoch": 0.20006506180871828, "grad_norm": 2.2532644271850586, "learning_rate": 4.989644146988639e-05, "loss": 1.0367, "step": 615 }, { "epoch": 0.20169160702667535, "grad_norm": 2.036604642868042, "learning_rate": 4.989408156069537e-05, "loss": 0.9622, "step": 620 }, { "epoch": 0.2033181522446324, "grad_norm": 2.730886459350586, "learning_rate": 4.989169512165253e-05, "loss": 1.0176, "step": 625 }, { "epoch": 0.20494469746258945, "grad_norm": 2.1160526275634766, "learning_rate": 4.988928215530111e-05, "loss": 0.9827, "step": 630 }, { "epoch": 0.20657124268054652, "grad_norm": 3.1011462211608887, "learning_rate": 4.988684266421263e-05, "loss": 1.0526, "step": 635 }, { "epoch": 0.20819778789850357, "grad_norm": 2.138927936553955, "learning_rate": 4.9884376650986874e-05, "loss": 1.0044, "step": 640 }, { "epoch": 0.20982433311646065, "grad_norm": 2.2729949951171875, "learning_rate": 4.988188411825191e-05, "loss": 0.9911, "step": 645 }, { "epoch": 0.2114508783344177, "grad_norm": 1.5672523975372314, "learning_rate": 4.987936506866405e-05, "loss": 0.9847, "step": 650 }, { "epoch": 0.21307742355237475, "grad_norm": 3.5293917655944824, "learning_rate": 4.987681950490786e-05, "loss": 0.9918, "step": 655 }, { "epoch": 0.21470396877033182, "grad_norm": 1.7744712829589844, "learning_rate": 4.987424742969616e-05, "loss": 1.012, "step": 660 }, { "epoch": 0.21633051398828887, "grad_norm": 1.598602294921875, "learning_rate": 4.987164884577007e-05, "loss": 1.0146, "step": 665 }, { "epoch": 0.21795705920624595, "grad_norm": 1.7109062671661377, "learning_rate": 4.986902375589889e-05, "loss": 1.0063, "step": 670 }, { "epoch": 0.219583604424203, "grad_norm": 2.0958328247070312, "learning_rate": 4.986637216288021e-05, "loss": 0.993, "step": 675 }, { "epoch": 0.22121014964216004, "grad_norm": 1.6304259300231934, "learning_rate": 4.986369406953988e-05, "loss": 0.9761, "step": 680 }, { "epoch": 0.22283669486011712, "grad_norm": 1.977616548538208, "learning_rate": 4.986098947873195e-05, "loss": 0.9631, "step": 685 }, { "epoch": 0.22446324007807417, "grad_norm": 1.9147331714630127, "learning_rate": 4.985825839333871e-05, "loss": 0.9798, "step": 690 }, { "epoch": 0.22608978529603124, "grad_norm": 1.9273673295974731, "learning_rate": 4.985550081627074e-05, "loss": 1.0019, "step": 695 }, { "epoch": 0.2277163305139883, "grad_norm": 2.0150973796844482, "learning_rate": 4.985271675046679e-05, "loss": 0.9924, "step": 700 }, { "epoch": 0.22934287573194534, "grad_norm": 1.9766995906829834, "learning_rate": 4.984990619889387e-05, "loss": 0.984, "step": 705 }, { "epoch": 0.23096942094990242, "grad_norm": 1.6579651832580566, "learning_rate": 4.984706916454721e-05, "loss": 1.019, "step": 710 }, { "epoch": 0.23259596616785946, "grad_norm": 1.904036045074463, "learning_rate": 4.984420565045027e-05, "loss": 1.0067, "step": 715 }, { "epoch": 0.2342225113858165, "grad_norm": 1.8574409484863281, "learning_rate": 4.984131565965472e-05, "loss": 0.9616, "step": 720 }, { "epoch": 0.2358490566037736, "grad_norm": 1.7669479846954346, "learning_rate": 4.983839919524045e-05, "loss": 0.9859, "step": 725 }, { "epoch": 0.23747560182173064, "grad_norm": 2.423046827316284, "learning_rate": 4.983545626031555e-05, "loss": 0.9794, "step": 730 }, { "epoch": 0.2391021470396877, "grad_norm": 1.3990832567214966, "learning_rate": 4.983248685801636e-05, "loss": 1.0111, "step": 735 }, { "epoch": 0.24072869225764476, "grad_norm": 2.0094950199127197, "learning_rate": 4.982949099150738e-05, "loss": 0.9743, "step": 740 }, { "epoch": 0.2423552374756018, "grad_norm": 2.589078903198242, "learning_rate": 4.982646866398133e-05, "loss": 0.9941, "step": 745 }, { "epoch": 0.24398178269355889, "grad_norm": 1.907644271850586, "learning_rate": 4.982341987865914e-05, "loss": 1.0282, "step": 750 }, { "epoch": 0.24560832791151593, "grad_norm": 1.7494215965270996, "learning_rate": 4.9820344638789926e-05, "loss": 0.9679, "step": 755 }, { "epoch": 0.247234873129473, "grad_norm": 2.0386955738067627, "learning_rate": 4.981724294765101e-05, "loss": 0.9834, "step": 760 }, { "epoch": 0.24886141834743006, "grad_norm": 2.085916519165039, "learning_rate": 4.9814114808547876e-05, "loss": 1.002, "step": 765 }, { "epoch": 0.2504879635653871, "grad_norm": 1.6058604717254639, "learning_rate": 4.981096022481422e-05, "loss": 0.9536, "step": 770 }, { "epoch": 0.2521145087833442, "grad_norm": 1.4208518266677856, "learning_rate": 4.980777919981191e-05, "loss": 0.99, "step": 775 }, { "epoch": 0.25374105400130126, "grad_norm": 1.9622782468795776, "learning_rate": 4.980457173693099e-05, "loss": 0.9744, "step": 780 }, { "epoch": 0.2553675992192583, "grad_norm": 1.8765822649002075, "learning_rate": 4.980133783958969e-05, "loss": 0.9727, "step": 785 }, { "epoch": 0.25699414443721535, "grad_norm": 2.350468635559082, "learning_rate": 4.9798077511234396e-05, "loss": 0.9772, "step": 790 }, { "epoch": 0.25862068965517243, "grad_norm": 2.3239855766296387, "learning_rate": 4.979479075533967e-05, "loss": 0.9643, "step": 795 }, { "epoch": 0.26024723487312945, "grad_norm": 1.673163652420044, "learning_rate": 4.9791477575408254e-05, "loss": 0.9818, "step": 800 }, { "epoch": 0.2618737800910865, "grad_norm": 2.6592307090759277, "learning_rate": 4.9788137974971006e-05, "loss": 0.9771, "step": 805 }, { "epoch": 0.2635003253090436, "grad_norm": 1.6686887741088867, "learning_rate": 4.9784771957586995e-05, "loss": 0.9731, "step": 810 }, { "epoch": 0.2651268705270006, "grad_norm": 1.6961290836334229, "learning_rate": 4.97813795268434e-05, "loss": 0.9907, "step": 815 }, { "epoch": 0.2667534157449577, "grad_norm": 1.6171329021453857, "learning_rate": 4.977796068635558e-05, "loss": 0.9544, "step": 820 }, { "epoch": 0.2683799609629148, "grad_norm": 1.646835446357727, "learning_rate": 4.977451543976701e-05, "loss": 0.9733, "step": 825 }, { "epoch": 0.27000650618087185, "grad_norm": 1.7419092655181885, "learning_rate": 4.9771043790749335e-05, "loss": 0.9817, "step": 830 }, { "epoch": 0.2716330513988289, "grad_norm": 1.9374152421951294, "learning_rate": 4.976754574300231e-05, "loss": 0.9912, "step": 835 }, { "epoch": 0.27325959661678595, "grad_norm": 2.559399366378784, "learning_rate": 4.9764021300253844e-05, "loss": 0.9726, "step": 840 }, { "epoch": 0.274886141834743, "grad_norm": 2.07173490524292, "learning_rate": 4.976047046625997e-05, "loss": 0.9854, "step": 845 }, { "epoch": 0.27651268705270005, "grad_norm": 1.7450006008148193, "learning_rate": 4.975689324480484e-05, "loss": 0.9951, "step": 850 }, { "epoch": 0.2781392322706571, "grad_norm": 2.0831046104431152, "learning_rate": 4.975328963970073e-05, "loss": 0.9672, "step": 855 }, { "epoch": 0.2797657774886142, "grad_norm": 1.728157877922058, "learning_rate": 4.9749659654788036e-05, "loss": 0.9647, "step": 860 }, { "epoch": 0.2813923227065712, "grad_norm": 1.5945899486541748, "learning_rate": 4.9746003293935275e-05, "loss": 0.9773, "step": 865 }, { "epoch": 0.2830188679245283, "grad_norm": 1.8794679641723633, "learning_rate": 4.974232056103906e-05, "loss": 0.9582, "step": 870 }, { "epoch": 0.28464541314248537, "grad_norm": 2.6000568866729736, "learning_rate": 4.973861146002411e-05, "loss": 0.9105, "step": 875 }, { "epoch": 0.28627195836044245, "grad_norm": 1.840985655784607, "learning_rate": 4.973487599484324e-05, "loss": 1.0132, "step": 880 }, { "epoch": 0.28789850357839947, "grad_norm": 1.5071011781692505, "learning_rate": 4.973111416947739e-05, "loss": 0.9504, "step": 885 }, { "epoch": 0.28952504879635654, "grad_norm": 1.633811593055725, "learning_rate": 4.972732598793556e-05, "loss": 0.9621, "step": 890 }, { "epoch": 0.2911515940143136, "grad_norm": 1.4036028385162354, "learning_rate": 4.972351145425485e-05, "loss": 0.9588, "step": 895 }, { "epoch": 0.29277813923227064, "grad_norm": 1.7874526977539062, "learning_rate": 4.9719670572500444e-05, "loss": 1.0142, "step": 900 }, { "epoch": 0.2944046844502277, "grad_norm": 1.7446445226669312, "learning_rate": 4.97158033467656e-05, "loss": 0.9496, "step": 905 }, { "epoch": 0.2960312296681848, "grad_norm": 1.6762101650238037, "learning_rate": 4.9711909781171676e-05, "loss": 0.971, "step": 910 }, { "epoch": 0.2976577748861418, "grad_norm": 2.1427559852600098, "learning_rate": 4.970798987986805e-05, "loss": 0.9899, "step": 915 }, { "epoch": 0.2992843201040989, "grad_norm": 1.4986960887908936, "learning_rate": 4.970404364703222e-05, "loss": 0.9888, "step": 920 }, { "epoch": 0.30091086532205596, "grad_norm": 1.6600645780563354, "learning_rate": 4.970007108686972e-05, "loss": 0.9712, "step": 925 }, { "epoch": 0.302537410540013, "grad_norm": 1.5820732116699219, "learning_rate": 4.9696072203614134e-05, "loss": 0.9653, "step": 930 }, { "epoch": 0.30416395575797006, "grad_norm": 1.9865201711654663, "learning_rate": 4.969204700152712e-05, "loss": 0.9547, "step": 935 }, { "epoch": 0.30579050097592714, "grad_norm": 1.6267117261886597, "learning_rate": 4.9687995484898365e-05, "loss": 0.9365, "step": 940 }, { "epoch": 0.3074170461938842, "grad_norm": 1.5512055158615112, "learning_rate": 4.9683917658045606e-05, "loss": 0.9674, "step": 945 }, { "epoch": 0.30904359141184123, "grad_norm": 2.146451711654663, "learning_rate": 4.9679813525314635e-05, "loss": 0.9202, "step": 950 }, { "epoch": 0.3106701366297983, "grad_norm": 2.064035177230835, "learning_rate": 4.967568309107925e-05, "loss": 0.9546, "step": 955 }, { "epoch": 0.3122966818477554, "grad_norm": 1.9662508964538574, "learning_rate": 4.967152635974129e-05, "loss": 0.9432, "step": 960 }, { "epoch": 0.3139232270657124, "grad_norm": 1.4268410205841064, "learning_rate": 4.966734333573063e-05, "loss": 0.943, "step": 965 }, { "epoch": 0.3155497722836695, "grad_norm": 1.4327384233474731, "learning_rate": 4.966313402350516e-05, "loss": 0.9588, "step": 970 }, { "epoch": 0.31717631750162656, "grad_norm": 1.9224755764007568, "learning_rate": 4.965889842755077e-05, "loss": 0.9775, "step": 975 }, { "epoch": 0.3188028627195836, "grad_norm": 1.8693453073501587, "learning_rate": 4.965463655238139e-05, "loss": 0.9687, "step": 980 }, { "epoch": 0.32042940793754066, "grad_norm": 1.513520359992981, "learning_rate": 4.965034840253893e-05, "loss": 0.9569, "step": 985 }, { "epoch": 0.32205595315549773, "grad_norm": 1.4836102724075317, "learning_rate": 4.964603398259331e-05, "loss": 0.9335, "step": 990 }, { "epoch": 0.3236824983734548, "grad_norm": 1.8346377611160278, "learning_rate": 4.9641693297142455e-05, "loss": 0.9693, "step": 995 }, { "epoch": 0.32530904359141183, "grad_norm": 1.4152843952178955, "learning_rate": 4.9637326350812266e-05, "loss": 0.9441, "step": 1000 }, { "epoch": 0.3269355888093689, "grad_norm": 1.70713472366333, "learning_rate": 4.963293314825663e-05, "loss": 0.9252, "step": 1005 }, { "epoch": 0.328562134027326, "grad_norm": 1.5594573020935059, "learning_rate": 4.962851369415744e-05, "loss": 0.966, "step": 1010 }, { "epoch": 0.330188679245283, "grad_norm": 1.89411199092865, "learning_rate": 4.962406799322454e-05, "loss": 0.9546, "step": 1015 }, { "epoch": 0.3318152244632401, "grad_norm": 2.621799945831299, "learning_rate": 4.961959605019576e-05, "loss": 0.9728, "step": 1020 }, { "epoch": 0.33344176968119715, "grad_norm": 1.4206135272979736, "learning_rate": 4.961509786983689e-05, "loss": 0.9625, "step": 1025 }, { "epoch": 0.3350683148991542, "grad_norm": 1.4925669431686401, "learning_rate": 4.961057345694167e-05, "loss": 0.9774, "step": 1030 }, { "epoch": 0.33669486011711125, "grad_norm": 1.7291913032531738, "learning_rate": 4.9606022816331824e-05, "loss": 0.9374, "step": 1035 }, { "epoch": 0.3383214053350683, "grad_norm": 2.6825973987579346, "learning_rate": 4.960144595285701e-05, "loss": 0.9705, "step": 1040 }, { "epoch": 0.33994795055302535, "grad_norm": 1.8575574159622192, "learning_rate": 4.959684287139482e-05, "loss": 0.9923, "step": 1045 }, { "epoch": 0.3415744957709824, "grad_norm": 1.8348548412322998, "learning_rate": 4.959221357685081e-05, "loss": 0.9582, "step": 1050 }, { "epoch": 0.3432010409889395, "grad_norm": 1.757918119430542, "learning_rate": 4.9587558074158464e-05, "loss": 0.9639, "step": 1055 }, { "epoch": 0.3448275862068966, "grad_norm": 1.3968441486358643, "learning_rate": 4.958287636827919e-05, "loss": 0.9509, "step": 1060 }, { "epoch": 0.3464541314248536, "grad_norm": 1.9735528230667114, "learning_rate": 4.9578168464202324e-05, "loss": 0.9501, "step": 1065 }, { "epoch": 0.34808067664281067, "grad_norm": 1.5475578308105469, "learning_rate": 4.9573434366945124e-05, "loss": 0.9679, "step": 1070 }, { "epoch": 0.34970722186076775, "grad_norm": 1.778678297996521, "learning_rate": 4.956867408155277e-05, "loss": 0.9688, "step": 1075 }, { "epoch": 0.35133376707872477, "grad_norm": 1.8664950132369995, "learning_rate": 4.956388761309832e-05, "loss": 0.9351, "step": 1080 }, { "epoch": 0.35296031229668184, "grad_norm": 1.6625173091888428, "learning_rate": 4.955907496668279e-05, "loss": 0.9745, "step": 1085 }, { "epoch": 0.3545868575146389, "grad_norm": 1.519795298576355, "learning_rate": 4.955423614743503e-05, "loss": 0.9482, "step": 1090 }, { "epoch": 0.35621340273259594, "grad_norm": 1.3990721702575684, "learning_rate": 4.954937116051183e-05, "loss": 0.9536, "step": 1095 }, { "epoch": 0.357839947950553, "grad_norm": 1.4707249402999878, "learning_rate": 4.954448001109785e-05, "loss": 0.9294, "step": 1100 }, { "epoch": 0.3594664931685101, "grad_norm": 1.9385992288589478, "learning_rate": 4.953956270440563e-05, "loss": 0.948, "step": 1105 }, { "epoch": 0.36109303838646717, "grad_norm": 1.8499996662139893, "learning_rate": 4.953461924567559e-05, "loss": 1.0079, "step": 1110 }, { "epoch": 0.3627195836044242, "grad_norm": 1.5415987968444824, "learning_rate": 4.952964964017602e-05, "loss": 0.9838, "step": 1115 }, { "epoch": 0.36434612882238127, "grad_norm": 1.7730964422225952, "learning_rate": 4.952465389320307e-05, "loss": 0.9613, "step": 1120 }, { "epoch": 0.36597267404033834, "grad_norm": 1.5385533571243286, "learning_rate": 4.951963201008076e-05, "loss": 0.9375, "step": 1125 }, { "epoch": 0.36759921925829536, "grad_norm": 1.9418575763702393, "learning_rate": 4.951458399616096e-05, "loss": 0.9311, "step": 1130 }, { "epoch": 0.36922576447625244, "grad_norm": 1.4374250173568726, "learning_rate": 4.9509509856823376e-05, "loss": 0.9331, "step": 1135 }, { "epoch": 0.3708523096942095, "grad_norm": 1.9249181747436523, "learning_rate": 4.9504409597475565e-05, "loss": 0.9724, "step": 1140 }, { "epoch": 0.37247885491216653, "grad_norm": 1.7576247453689575, "learning_rate": 4.949928322355293e-05, "loss": 0.9056, "step": 1145 }, { "epoch": 0.3741054001301236, "grad_norm": 1.355421543121338, "learning_rate": 4.949413074051868e-05, "loss": 0.9334, "step": 1150 }, { "epoch": 0.3757319453480807, "grad_norm": 1.8686085939407349, "learning_rate": 4.948895215386388e-05, "loss": 0.934, "step": 1155 }, { "epoch": 0.37735849056603776, "grad_norm": 2.2301089763641357, "learning_rate": 4.948374746910739e-05, "loss": 0.972, "step": 1160 }, { "epoch": 0.3789850357839948, "grad_norm": 1.836775541305542, "learning_rate": 4.947851669179589e-05, "loss": 0.9642, "step": 1165 }, { "epoch": 0.38061158100195186, "grad_norm": 1.971665382385254, "learning_rate": 4.9473259827503865e-05, "loss": 0.9633, "step": 1170 }, { "epoch": 0.38223812621990894, "grad_norm": 2.0484726428985596, "learning_rate": 4.946797688183361e-05, "loss": 0.9384, "step": 1175 }, { "epoch": 0.38386467143786596, "grad_norm": 1.9105437994003296, "learning_rate": 4.94626678604152e-05, "loss": 0.9383, "step": 1180 }, { "epoch": 0.38549121665582303, "grad_norm": 1.6543099880218506, "learning_rate": 4.945733276890652e-05, "loss": 0.9458, "step": 1185 }, { "epoch": 0.3871177618737801, "grad_norm": 1.6211663484573364, "learning_rate": 4.945197161299321e-05, "loss": 0.9231, "step": 1190 }, { "epoch": 0.38874430709173713, "grad_norm": 1.5416107177734375, "learning_rate": 4.944658439838872e-05, "loss": 0.9331, "step": 1195 }, { "epoch": 0.3903708523096942, "grad_norm": 1.2914979457855225, "learning_rate": 4.9441171130834245e-05, "loss": 0.913, "step": 1200 }, { "epoch": 0.3919973975276513, "grad_norm": 2.8815155029296875, "learning_rate": 4.943573181609876e-05, "loss": 0.9807, "step": 1205 }, { "epoch": 0.3936239427456083, "grad_norm": 2.1332266330718994, "learning_rate": 4.943026645997898e-05, "loss": 0.9489, "step": 1210 }, { "epoch": 0.3952504879635654, "grad_norm": 1.5281325578689575, "learning_rate": 4.9424775068299404e-05, "loss": 0.9758, "step": 1215 }, { "epoch": 0.39687703318152245, "grad_norm": 1.47801673412323, "learning_rate": 4.941925764691224e-05, "loss": 0.9372, "step": 1220 }, { "epoch": 0.39850357839947953, "grad_norm": 1.387467622756958, "learning_rate": 4.941371420169746e-05, "loss": 0.9521, "step": 1225 }, { "epoch": 0.40013012361743655, "grad_norm": 2.0657622814178467, "learning_rate": 4.940814473856278e-05, "loss": 0.9479, "step": 1230 }, { "epoch": 0.4017566688353936, "grad_norm": 1.4878047704696655, "learning_rate": 4.940254926344361e-05, "loss": 0.9189, "step": 1235 }, { "epoch": 0.4033832140533507, "grad_norm": 1.6807146072387695, "learning_rate": 4.9396927782303105e-05, "loss": 0.9664, "step": 1240 }, { "epoch": 0.4050097592713077, "grad_norm": 2.2471487522125244, "learning_rate": 4.939128030113213e-05, "loss": 0.9335, "step": 1245 }, { "epoch": 0.4066363044892648, "grad_norm": 1.8448090553283691, "learning_rate": 4.938560682594925e-05, "loss": 0.958, "step": 1250 }, { "epoch": 0.4082628497072219, "grad_norm": 1.5685856342315674, "learning_rate": 4.9379907362800756e-05, "loss": 0.9243, "step": 1255 }, { "epoch": 0.4098893949251789, "grad_norm": 1.937020182609558, "learning_rate": 4.937418191776061e-05, "loss": 0.9214, "step": 1260 }, { "epoch": 0.41151594014313597, "grad_norm": 2.0497727394104004, "learning_rate": 4.936843049693046e-05, "loss": 0.9193, "step": 1265 }, { "epoch": 0.41314248536109305, "grad_norm": 4.580631256103516, "learning_rate": 4.936265310643967e-05, "loss": 0.9449, "step": 1270 }, { "epoch": 0.4147690305790501, "grad_norm": 2.201979637145996, "learning_rate": 4.935684975244525e-05, "loss": 0.9561, "step": 1275 }, { "epoch": 0.41639557579700714, "grad_norm": 1.5102165937423706, "learning_rate": 4.9351020441131876e-05, "loss": 0.9337, "step": 1280 }, { "epoch": 0.4180221210149642, "grad_norm": 1.9690766334533691, "learning_rate": 4.9345165178711904e-05, "loss": 0.9316, "step": 1285 }, { "epoch": 0.4196486662329213, "grad_norm": 1.6046276092529297, "learning_rate": 4.933928397142536e-05, "loss": 0.9686, "step": 1290 }, { "epoch": 0.4212752114508783, "grad_norm": 2.0253474712371826, "learning_rate": 4.9333376825539864e-05, "loss": 0.9337, "step": 1295 }, { "epoch": 0.4229017566688354, "grad_norm": 2.475409507751465, "learning_rate": 4.932744374735075e-05, "loss": 0.9345, "step": 1300 }, { "epoch": 0.42452830188679247, "grad_norm": 1.492626667022705, "learning_rate": 4.932148474318094e-05, "loss": 0.9059, "step": 1305 }, { "epoch": 0.4261548471047495, "grad_norm": 1.358420968055725, "learning_rate": 4.9315499819381004e-05, "loss": 0.9468, "step": 1310 }, { "epoch": 0.42778139232270657, "grad_norm": 1.4562768936157227, "learning_rate": 4.930948898232912e-05, "loss": 0.9239, "step": 1315 }, { "epoch": 0.42940793754066364, "grad_norm": 2.1248080730438232, "learning_rate": 4.93034522384311e-05, "loss": 0.9419, "step": 1320 }, { "epoch": 0.43103448275862066, "grad_norm": 1.535090446472168, "learning_rate": 4.929738959412037e-05, "loss": 0.9565, "step": 1325 }, { "epoch": 0.43266102797657774, "grad_norm": 1.8626047372817993, "learning_rate": 4.9291301055857916e-05, "loss": 0.9372, "step": 1330 }, { "epoch": 0.4342875731945348, "grad_norm": 1.582846999168396, "learning_rate": 4.9285186630132376e-05, "loss": 0.9311, "step": 1335 }, { "epoch": 0.4359141184124919, "grad_norm": 1.5691665410995483, "learning_rate": 4.9279046323459934e-05, "loss": 0.9266, "step": 1340 }, { "epoch": 0.4375406636304489, "grad_norm": 1.534501314163208, "learning_rate": 4.927288014238438e-05, "loss": 0.928, "step": 1345 }, { "epoch": 0.439167208848406, "grad_norm": 1.6533100605010986, "learning_rate": 4.9266688093477066e-05, "loss": 0.9258, "step": 1350 }, { "epoch": 0.44079375406636306, "grad_norm": 1.916756272315979, "learning_rate": 4.926047018333691e-05, "loss": 0.9673, "step": 1355 }, { "epoch": 0.4424202992843201, "grad_norm": 1.4287587404251099, "learning_rate": 4.925422641859041e-05, "loss": 0.9536, "step": 1360 }, { "epoch": 0.44404684450227716, "grad_norm": 1.964421272277832, "learning_rate": 4.92479568058916e-05, "loss": 0.9736, "step": 1365 }, { "epoch": 0.44567338972023424, "grad_norm": 1.5249435901641846, "learning_rate": 4.924166135192206e-05, "loss": 0.9511, "step": 1370 }, { "epoch": 0.44729993493819126, "grad_norm": 1.3882592916488647, "learning_rate": 4.923534006339091e-05, "loss": 0.9453, "step": 1375 }, { "epoch": 0.44892648015614833, "grad_norm": 1.637730360031128, "learning_rate": 4.92289929470348e-05, "loss": 0.9126, "step": 1380 }, { "epoch": 0.4505530253741054, "grad_norm": 1.747154712677002, "learning_rate": 4.922262000961793e-05, "loss": 0.9387, "step": 1385 }, { "epoch": 0.4521795705920625, "grad_norm": 1.6546299457550049, "learning_rate": 4.9216221257931984e-05, "loss": 0.9132, "step": 1390 }, { "epoch": 0.4538061158100195, "grad_norm": 1.4564385414123535, "learning_rate": 4.920979669879617e-05, "loss": 0.9313, "step": 1395 }, { "epoch": 0.4554326610279766, "grad_norm": 1.184078574180603, "learning_rate": 4.920334633905721e-05, "loss": 0.9035, "step": 1400 }, { "epoch": 0.45705920624593366, "grad_norm": 1.737348198890686, "learning_rate": 4.9196870185589304e-05, "loss": 0.917, "step": 1405 }, { "epoch": 0.4586857514638907, "grad_norm": 2.8453683853149414, "learning_rate": 4.919036824529415e-05, "loss": 0.9026, "step": 1410 }, { "epoch": 0.46031229668184775, "grad_norm": 1.8641711473464966, "learning_rate": 4.918384052510092e-05, "loss": 0.9521, "step": 1415 }, { "epoch": 0.46193884189980483, "grad_norm": 1.5529496669769287, "learning_rate": 4.917728703196628e-05, "loss": 0.9586, "step": 1420 }, { "epoch": 0.46356538711776185, "grad_norm": 1.513617992401123, "learning_rate": 4.9170707772874324e-05, "loss": 0.9014, "step": 1425 }, { "epoch": 0.4651919323357189, "grad_norm": 1.7065914869308472, "learning_rate": 4.9164102754836655e-05, "loss": 0.9307, "step": 1430 }, { "epoch": 0.466818477553676, "grad_norm": 1.4803603887557983, "learning_rate": 4.915747198489229e-05, "loss": 0.9328, "step": 1435 }, { "epoch": 0.468445022771633, "grad_norm": 1.2487103939056396, "learning_rate": 4.915081547010769e-05, "loss": 0.9037, "step": 1440 }, { "epoch": 0.4700715679895901, "grad_norm": 1.9018504619598389, "learning_rate": 4.914413321757679e-05, "loss": 0.9697, "step": 1445 }, { "epoch": 0.4716981132075472, "grad_norm": 1.394386649131775, "learning_rate": 4.913742523442091e-05, "loss": 0.897, "step": 1450 }, { "epoch": 0.47332465842550425, "grad_norm": 1.7415742874145508, "learning_rate": 4.913069152778881e-05, "loss": 0.9375, "step": 1455 }, { "epoch": 0.4749512036434613, "grad_norm": 1.9433326721191406, "learning_rate": 4.912393210485666e-05, "loss": 0.9799, "step": 1460 }, { "epoch": 0.47657774886141835, "grad_norm": 1.216127872467041, "learning_rate": 4.911714697282806e-05, "loss": 0.9291, "step": 1465 }, { "epoch": 0.4782042940793754, "grad_norm": 1.6121269464492798, "learning_rate": 4.9110336138933964e-05, "loss": 0.9175, "step": 1470 }, { "epoch": 0.47983083929733245, "grad_norm": 1.493024230003357, "learning_rate": 4.9103499610432744e-05, "loss": 0.8798, "step": 1475 }, { "epoch": 0.4814573845152895, "grad_norm": 1.4081448316574097, "learning_rate": 4.909663739461017e-05, "loss": 0.8915, "step": 1480 }, { "epoch": 0.4830839297332466, "grad_norm": 1.7195796966552734, "learning_rate": 4.908974949877935e-05, "loss": 0.9632, "step": 1485 }, { "epoch": 0.4847104749512036, "grad_norm": 2.151754856109619, "learning_rate": 4.908283593028078e-05, "loss": 0.9258, "step": 1490 }, { "epoch": 0.4863370201691607, "grad_norm": 1.5470004081726074, "learning_rate": 4.907589669648232e-05, "loss": 0.898, "step": 1495 }, { "epoch": 0.48796356538711777, "grad_norm": 1.494513750076294, "learning_rate": 4.9068931804779175e-05, "loss": 0.9376, "step": 1500 }, { "epoch": 0.48959011060507485, "grad_norm": 1.5122071504592896, "learning_rate": 4.9061941262593886e-05, "loss": 0.9262, "step": 1505 }, { "epoch": 0.49121665582303187, "grad_norm": 1.4356802701950073, "learning_rate": 4.905492507737634e-05, "loss": 0.9265, "step": 1510 }, { "epoch": 0.49284320104098894, "grad_norm": 1.4464737176895142, "learning_rate": 4.904788325660377e-05, "loss": 0.9076, "step": 1515 }, { "epoch": 0.494469746258946, "grad_norm": 1.7022855281829834, "learning_rate": 4.9040815807780676e-05, "loss": 0.9084, "step": 1520 }, { "epoch": 0.49609629147690304, "grad_norm": 1.3407727479934692, "learning_rate": 4.9033722738438924e-05, "loss": 0.9474, "step": 1525 }, { "epoch": 0.4977228366948601, "grad_norm": 1.3368557691574097, "learning_rate": 4.9026604056137664e-05, "loss": 0.9202, "step": 1530 }, { "epoch": 0.4993493819128172, "grad_norm": 1.5609967708587646, "learning_rate": 4.901945976846334e-05, "loss": 0.9509, "step": 1535 }, { "epoch": 0.5009759271307742, "grad_norm": 1.7592136859893799, "learning_rate": 4.9012289883029674e-05, "loss": 0.9399, "step": 1540 }, { "epoch": 0.5026024723487313, "grad_norm": 1.7365574836730957, "learning_rate": 4.900509440747769e-05, "loss": 0.9166, "step": 1545 }, { "epoch": 0.5042290175666884, "grad_norm": 1.6642284393310547, "learning_rate": 4.8997873349475664e-05, "loss": 0.9264, "step": 1550 }, { "epoch": 0.5058555627846454, "grad_norm": 1.6465163230895996, "learning_rate": 4.8990626716719154e-05, "loss": 0.9346, "step": 1555 }, { "epoch": 0.5074821080026025, "grad_norm": 1.4424694776535034, "learning_rate": 4.898335451693096e-05, "loss": 0.9193, "step": 1560 }, { "epoch": 0.5091086532205595, "grad_norm": 1.583742380142212, "learning_rate": 4.897605675786113e-05, "loss": 0.9057, "step": 1565 }, { "epoch": 0.5107351984385166, "grad_norm": 1.4776053428649902, "learning_rate": 4.896873344728695e-05, "loss": 0.9096, "step": 1570 }, { "epoch": 0.5123617436564737, "grad_norm": 1.4164116382598877, "learning_rate": 4.896138459301295e-05, "loss": 0.9057, "step": 1575 }, { "epoch": 0.5139882888744307, "grad_norm": 1.268789529800415, "learning_rate": 4.895401020287086e-05, "loss": 0.9464, "step": 1580 }, { "epoch": 0.5156148340923877, "grad_norm": 1.872787594795227, "learning_rate": 4.894661028471964e-05, "loss": 0.9279, "step": 1585 }, { "epoch": 0.5172413793103449, "grad_norm": 1.479695439338684, "learning_rate": 4.893918484644545e-05, "loss": 0.9167, "step": 1590 }, { "epoch": 0.5188679245283019, "grad_norm": 1.6517423391342163, "learning_rate": 4.893173389596165e-05, "loss": 0.8991, "step": 1595 }, { "epoch": 0.5204944697462589, "grad_norm": 1.6372499465942383, "learning_rate": 4.892425744120879e-05, "loss": 0.9136, "step": 1600 }, { "epoch": 0.522121014964216, "grad_norm": 1.5752387046813965, "learning_rate": 4.8916755490154584e-05, "loss": 0.9483, "step": 1605 }, { "epoch": 0.523747560182173, "grad_norm": 1.5180872678756714, "learning_rate": 4.890922805079394e-05, "loss": 0.9549, "step": 1610 }, { "epoch": 0.5253741054001301, "grad_norm": 1.3289493322372437, "learning_rate": 4.890167513114893e-05, "loss": 0.9771, "step": 1615 }, { "epoch": 0.5270006506180872, "grad_norm": 1.8332977294921875, "learning_rate": 4.8894096739268746e-05, "loss": 0.9305, "step": 1620 }, { "epoch": 0.5286271958360442, "grad_norm": 1.9242297410964966, "learning_rate": 4.888649288322976e-05, "loss": 0.9308, "step": 1625 }, { "epoch": 0.5302537410540012, "grad_norm": 2.045342445373535, "learning_rate": 4.887886357113548e-05, "loss": 0.9267, "step": 1630 }, { "epoch": 0.5318802862719584, "grad_norm": 2.2191734313964844, "learning_rate": 4.8871208811116523e-05, "loss": 0.9216, "step": 1635 }, { "epoch": 0.5335068314899154, "grad_norm": 2.301372766494751, "learning_rate": 4.886352861133064e-05, "loss": 0.9106, "step": 1640 }, { "epoch": 0.5351333767078725, "grad_norm": 1.5266695022583008, "learning_rate": 4.88558229799627e-05, "loss": 0.9192, "step": 1645 }, { "epoch": 0.5367599219258296, "grad_norm": 1.8200902938842773, "learning_rate": 4.884809192522465e-05, "loss": 0.9399, "step": 1650 }, { "epoch": 0.5383864671437866, "grad_norm": 1.3141257762908936, "learning_rate": 4.884033545535556e-05, "loss": 0.9209, "step": 1655 }, { "epoch": 0.5400130123617437, "grad_norm": 1.6493685245513916, "learning_rate": 4.883255357862156e-05, "loss": 0.9059, "step": 1660 }, { "epoch": 0.5416395575797007, "grad_norm": 1.416832685470581, "learning_rate": 4.882474630331587e-05, "loss": 0.8798, "step": 1665 }, { "epoch": 0.5432661027976577, "grad_norm": 1.3395907878875732, "learning_rate": 4.8816913637758786e-05, "loss": 0.9312, "step": 1670 }, { "epoch": 0.5448926480156149, "grad_norm": 1.4029171466827393, "learning_rate": 4.880905559029764e-05, "loss": 0.9283, "step": 1675 }, { "epoch": 0.5465191932335719, "grad_norm": 1.5277211666107178, "learning_rate": 4.880117216930683e-05, "loss": 0.919, "step": 1680 }, { "epoch": 0.5481457384515289, "grad_norm": 1.679788589477539, "learning_rate": 4.879326338318778e-05, "loss": 0.9004, "step": 1685 }, { "epoch": 0.549772283669486, "grad_norm": 1.3738101720809937, "learning_rate": 4.878532924036898e-05, "loss": 0.9017, "step": 1690 }, { "epoch": 0.5513988288874431, "grad_norm": 1.2911015748977661, "learning_rate": 4.877736974930589e-05, "loss": 0.932, "step": 1695 }, { "epoch": 0.5530253741054001, "grad_norm": 1.2811925411224365, "learning_rate": 4.8769384918481034e-05, "loss": 0.9133, "step": 1700 }, { "epoch": 0.5546519193233572, "grad_norm": 1.3518776893615723, "learning_rate": 4.876137475640392e-05, "loss": 0.9039, "step": 1705 }, { "epoch": 0.5562784645413142, "grad_norm": 1.6754337549209595, "learning_rate": 4.8753339271611035e-05, "loss": 0.9031, "step": 1710 }, { "epoch": 0.5579050097592713, "grad_norm": 1.4153861999511719, "learning_rate": 4.874527847266588e-05, "loss": 0.9312, "step": 1715 }, { "epoch": 0.5595315549772284, "grad_norm": 1.8386647701263428, "learning_rate": 4.873719236815892e-05, "loss": 0.9482, "step": 1720 }, { "epoch": 0.5611581001951854, "grad_norm": 1.4561530351638794, "learning_rate": 4.8729080966707596e-05, "loss": 0.9014, "step": 1725 }, { "epoch": 0.5627846454131424, "grad_norm": 1.8909015655517578, "learning_rate": 4.872094427695629e-05, "loss": 0.8961, "step": 1730 }, { "epoch": 0.5644111906310996, "grad_norm": 1.7473182678222656, "learning_rate": 4.871278230757637e-05, "loss": 0.9129, "step": 1735 }, { "epoch": 0.5660377358490566, "grad_norm": 1.3599871397018433, "learning_rate": 4.87045950672661e-05, "loss": 0.9572, "step": 1740 }, { "epoch": 0.5676642810670136, "grad_norm": 2.034395217895508, "learning_rate": 4.869638256475071e-05, "loss": 0.9426, "step": 1745 }, { "epoch": 0.5692908262849707, "grad_norm": 1.5797314643859863, "learning_rate": 4.8688144808782335e-05, "loss": 0.93, "step": 1750 }, { "epoch": 0.5709173715029278, "grad_norm": 1.9499742984771729, "learning_rate": 4.867988180814003e-05, "loss": 0.9395, "step": 1755 }, { "epoch": 0.5725439167208849, "grad_norm": 1.5070887804031372, "learning_rate": 4.8671593571629736e-05, "loss": 0.8909, "step": 1760 }, { "epoch": 0.5741704619388419, "grad_norm": 1.7918496131896973, "learning_rate": 4.866328010808432e-05, "loss": 0.9068, "step": 1765 }, { "epoch": 0.5757970071567989, "grad_norm": 1.481836199760437, "learning_rate": 4.865494142636352e-05, "loss": 0.8943, "step": 1770 }, { "epoch": 0.5774235523747561, "grad_norm": 1.4225833415985107, "learning_rate": 4.864657753535393e-05, "loss": 0.9425, "step": 1775 }, { "epoch": 0.5790500975927131, "grad_norm": 1.6397838592529297, "learning_rate": 4.863818844396904e-05, "loss": 0.8981, "step": 1780 }, { "epoch": 0.5806766428106701, "grad_norm": 1.5173492431640625, "learning_rate": 4.862977416114917e-05, "loss": 0.9429, "step": 1785 }, { "epoch": 0.5823031880286272, "grad_norm": 1.3602893352508545, "learning_rate": 4.862133469586151e-05, "loss": 0.924, "step": 1790 }, { "epoch": 0.5839297332465843, "grad_norm": 1.4060094356536865, "learning_rate": 4.861287005710007e-05, "loss": 0.9194, "step": 1795 }, { "epoch": 0.5855562784645413, "grad_norm": 1.3095462322235107, "learning_rate": 4.860438025388568e-05, "loss": 0.9459, "step": 1800 }, { "epoch": 0.5871828236824984, "grad_norm": 1.2292702198028564, "learning_rate": 4.859586529526603e-05, "loss": 0.9296, "step": 1805 }, { "epoch": 0.5888093689004554, "grad_norm": 1.5111331939697266, "learning_rate": 4.858732519031557e-05, "loss": 0.8881, "step": 1810 }, { "epoch": 0.5904359141184125, "grad_norm": 1.3684686422348022, "learning_rate": 4.857875994813556e-05, "loss": 0.9194, "step": 1815 }, { "epoch": 0.5920624593363696, "grad_norm": 1.7605769634246826, "learning_rate": 4.857016957785408e-05, "loss": 0.908, "step": 1820 }, { "epoch": 0.5936890045543266, "grad_norm": 1.6246333122253418, "learning_rate": 4.8561554088625966e-05, "loss": 0.8966, "step": 1825 }, { "epoch": 0.5953155497722836, "grad_norm": 1.2724145650863647, "learning_rate": 4.8552913489632805e-05, "loss": 0.9175, "step": 1830 }, { "epoch": 0.5969420949902408, "grad_norm": 1.3451007604599, "learning_rate": 4.8544247790082975e-05, "loss": 0.9193, "step": 1835 }, { "epoch": 0.5985686402081978, "grad_norm": 1.576870322227478, "learning_rate": 4.853555699921159e-05, "loss": 0.9213, "step": 1840 }, { "epoch": 0.6001951854261548, "grad_norm": 1.8668714761734009, "learning_rate": 4.8526841126280506e-05, "loss": 0.9216, "step": 1845 }, { "epoch": 0.6018217306441119, "grad_norm": 1.5587042570114136, "learning_rate": 4.851810018057831e-05, "loss": 0.8879, "step": 1850 }, { "epoch": 0.603448275862069, "grad_norm": 1.5127476453781128, "learning_rate": 4.8509334171420305e-05, "loss": 0.9333, "step": 1855 }, { "epoch": 0.605074821080026, "grad_norm": 1.5934640169143677, "learning_rate": 4.850054310814851e-05, "loss": 0.9284, "step": 1860 }, { "epoch": 0.6067013662979831, "grad_norm": 1.5028070211410522, "learning_rate": 4.849172700013164e-05, "loss": 0.9104, "step": 1865 }, { "epoch": 0.6083279115159401, "grad_norm": 1.3410147428512573, "learning_rate": 4.84828858567651e-05, "loss": 0.9317, "step": 1870 }, { "epoch": 0.6099544567338973, "grad_norm": 1.4812523126602173, "learning_rate": 4.8474019687470984e-05, "loss": 0.9362, "step": 1875 }, { "epoch": 0.6115810019518543, "grad_norm": 1.663413643836975, "learning_rate": 4.8465128501698056e-05, "loss": 0.9174, "step": 1880 }, { "epoch": 0.6132075471698113, "grad_norm": 1.8307366371154785, "learning_rate": 4.845621230892172e-05, "loss": 0.9182, "step": 1885 }, { "epoch": 0.6148340923877684, "grad_norm": 1.945298194885254, "learning_rate": 4.844727111864405e-05, "loss": 0.9198, "step": 1890 }, { "epoch": 0.6164606376057254, "grad_norm": 1.5019532442092896, "learning_rate": 4.8438304940393757e-05, "loss": 0.9031, "step": 1895 }, { "epoch": 0.6180871828236825, "grad_norm": 1.469648838043213, "learning_rate": 4.842931378372618e-05, "loss": 0.9037, "step": 1900 }, { "epoch": 0.6197137280416396, "grad_norm": 1.2155646085739136, "learning_rate": 4.842029765822328e-05, "loss": 0.8903, "step": 1905 }, { "epoch": 0.6213402732595966, "grad_norm": 1.2848434448242188, "learning_rate": 4.841125657349361e-05, "loss": 0.9157, "step": 1910 }, { "epoch": 0.6229668184775536, "grad_norm": 1.593353509902954, "learning_rate": 4.840219053917236e-05, "loss": 0.9092, "step": 1915 }, { "epoch": 0.6245933636955108, "grad_norm": 1.2928284406661987, "learning_rate": 4.8393099564921265e-05, "loss": 0.9053, "step": 1920 }, { "epoch": 0.6262199089134678, "grad_norm": 1.4733920097351074, "learning_rate": 4.838398366042868e-05, "loss": 0.88, "step": 1925 }, { "epoch": 0.6278464541314248, "grad_norm": 1.3781639337539673, "learning_rate": 4.837484283540949e-05, "loss": 0.9278, "step": 1930 }, { "epoch": 0.629472999349382, "grad_norm": 1.6138546466827393, "learning_rate": 4.8365677099605166e-05, "loss": 0.9233, "step": 1935 }, { "epoch": 0.631099544567339, "grad_norm": 1.6688655614852905, "learning_rate": 4.8356486462783713e-05, "loss": 0.8964, "step": 1940 }, { "epoch": 0.632726089785296, "grad_norm": 1.70151948928833, "learning_rate": 4.834727093473968e-05, "loss": 0.9288, "step": 1945 }, { "epoch": 0.6343526350032531, "grad_norm": 1.4687873125076294, "learning_rate": 4.833803052529414e-05, "loss": 0.9222, "step": 1950 }, { "epoch": 0.6359791802212101, "grad_norm": 1.4366106986999512, "learning_rate": 4.8328765244294685e-05, "loss": 0.8803, "step": 1955 }, { "epoch": 0.6376057254391672, "grad_norm": 1.5712631940841675, "learning_rate": 4.8319475101615394e-05, "loss": 0.9273, "step": 1960 }, { "epoch": 0.6392322706571243, "grad_norm": 1.5275715589523315, "learning_rate": 4.8310160107156886e-05, "loss": 0.901, "step": 1965 }, { "epoch": 0.6408588158750813, "grad_norm": 1.4599666595458984, "learning_rate": 4.8300820270846217e-05, "loss": 0.8943, "step": 1970 }, { "epoch": 0.6424853610930383, "grad_norm": 1.344032645225525, "learning_rate": 4.829145560263694e-05, "loss": 0.9264, "step": 1975 }, { "epoch": 0.6441119063109955, "grad_norm": 1.4584059715270996, "learning_rate": 4.828206611250906e-05, "loss": 0.8896, "step": 1980 }, { "epoch": 0.6457384515289525, "grad_norm": 1.6877212524414062, "learning_rate": 4.827265181046906e-05, "loss": 0.9163, "step": 1985 }, { "epoch": 0.6473649967469096, "grad_norm": 4.016079425811768, "learning_rate": 4.8263212706549845e-05, "loss": 0.9007, "step": 1990 }, { "epoch": 0.6489915419648666, "grad_norm": 1.9531245231628418, "learning_rate": 4.825374881081074e-05, "loss": 0.9271, "step": 1995 }, { "epoch": 0.6506180871828237, "grad_norm": 1.5667097568511963, "learning_rate": 4.824426013333751e-05, "loss": 0.9079, "step": 2000 }, { "epoch": 0.6522446324007808, "grad_norm": 1.608904242515564, "learning_rate": 4.823474668424234e-05, "loss": 0.9111, "step": 2005 }, { "epoch": 0.6538711776187378, "grad_norm": 1.2277331352233887, "learning_rate": 4.8225208473663794e-05, "loss": 0.9246, "step": 2010 }, { "epoch": 0.6554977228366948, "grad_norm": 1.6614294052124023, "learning_rate": 4.821564551176683e-05, "loss": 0.9403, "step": 2015 }, { "epoch": 0.657124268054652, "grad_norm": 1.6826685667037964, "learning_rate": 4.820605780874278e-05, "loss": 0.8964, "step": 2020 }, { "epoch": 0.658750813272609, "grad_norm": 1.3787823915481567, "learning_rate": 4.819644537480934e-05, "loss": 0.8739, "step": 2025 }, { "epoch": 0.660377358490566, "grad_norm": 1.6473807096481323, "learning_rate": 4.818680822021059e-05, "loss": 0.9068, "step": 2030 }, { "epoch": 0.6620039037085231, "grad_norm": 1.3588086366653442, "learning_rate": 4.817714635521692e-05, "loss": 0.901, "step": 2035 }, { "epoch": 0.6636304489264802, "grad_norm": 1.1436259746551514, "learning_rate": 4.816745979012508e-05, "loss": 0.8903, "step": 2040 }, { "epoch": 0.6652569941444372, "grad_norm": 1.9965174198150635, "learning_rate": 4.8157748535258116e-05, "loss": 0.9184, "step": 2045 }, { "epoch": 0.6668835393623943, "grad_norm": 1.5490531921386719, "learning_rate": 4.814801260096542e-05, "loss": 0.899, "step": 2050 }, { "epoch": 0.6685100845803513, "grad_norm": 1.5591039657592773, "learning_rate": 4.813825199762264e-05, "loss": 0.9086, "step": 2055 }, { "epoch": 0.6701366297983083, "grad_norm": 1.3072726726531982, "learning_rate": 4.812846673563177e-05, "loss": 0.8888, "step": 2060 }, { "epoch": 0.6717631750162655, "grad_norm": 1.3698911666870117, "learning_rate": 4.811865682542104e-05, "loss": 0.8885, "step": 2065 }, { "epoch": 0.6733897202342225, "grad_norm": 1.4218491315841675, "learning_rate": 4.8108822277444953e-05, "loss": 0.8776, "step": 2070 }, { "epoch": 0.6750162654521795, "grad_norm": 1.3596829175949097, "learning_rate": 4.80989631021843e-05, "loss": 0.9006, "step": 2075 }, { "epoch": 0.6766428106701367, "grad_norm": 1.4312387704849243, "learning_rate": 4.8089079310146077e-05, "loss": 0.919, "step": 2080 }, { "epoch": 0.6782693558880937, "grad_norm": 1.5680636167526245, "learning_rate": 4.807917091186353e-05, "loss": 0.9236, "step": 2085 }, { "epoch": 0.6798959011060507, "grad_norm": 1.3332332372665405, "learning_rate": 4.806923791789614e-05, "loss": 0.9042, "step": 2090 }, { "epoch": 0.6815224463240078, "grad_norm": 1.7250369787216187, "learning_rate": 4.805928033882959e-05, "loss": 0.9099, "step": 2095 }, { "epoch": 0.6831489915419648, "grad_norm": 1.4059739112854004, "learning_rate": 4.804929818527576e-05, "loss": 0.9046, "step": 2100 }, { "epoch": 0.684775536759922, "grad_norm": 1.4263503551483154, "learning_rate": 4.8039291467872724e-05, "loss": 0.9129, "step": 2105 }, { "epoch": 0.686402081977879, "grad_norm": 1.1846219301223755, "learning_rate": 4.8029260197284736e-05, "loss": 0.9454, "step": 2110 }, { "epoch": 0.688028627195836, "grad_norm": 1.9435006380081177, "learning_rate": 4.801920438420221e-05, "loss": 0.8906, "step": 2115 }, { "epoch": 0.6896551724137931, "grad_norm": 1.4786064624786377, "learning_rate": 4.800912403934171e-05, "loss": 0.9017, "step": 2120 }, { "epoch": 0.6912817176317502, "grad_norm": 1.2287206649780273, "learning_rate": 4.799901917344597e-05, "loss": 0.8905, "step": 2125 }, { "epoch": 0.6929082628497072, "grad_norm": 1.3458510637283325, "learning_rate": 4.798888979728382e-05, "loss": 0.9032, "step": 2130 }, { "epoch": 0.6945348080676643, "grad_norm": 1.6034053564071655, "learning_rate": 4.7978735921650246e-05, "loss": 0.9107, "step": 2135 }, { "epoch": 0.6961613532856213, "grad_norm": 1.6460193395614624, "learning_rate": 4.796855755736632e-05, "loss": 0.9429, "step": 2140 }, { "epoch": 0.6977878985035784, "grad_norm": 1.706559181213379, "learning_rate": 4.795835471527922e-05, "loss": 0.9287, "step": 2145 }, { "epoch": 0.6994144437215355, "grad_norm": 2.190673351287842, "learning_rate": 4.79481274062622e-05, "loss": 0.9015, "step": 2150 }, { "epoch": 0.7010409889394925, "grad_norm": 1.5731890201568604, "learning_rate": 4.7937875641214606e-05, "loss": 0.8917, "step": 2155 }, { "epoch": 0.7026675341574495, "grad_norm": 1.4344974756240845, "learning_rate": 4.792759943106183e-05, "loss": 0.9089, "step": 2160 }, { "epoch": 0.7042940793754067, "grad_norm": 1.3271836042404175, "learning_rate": 4.791729878675534e-05, "loss": 0.9279, "step": 2165 }, { "epoch": 0.7059206245933637, "grad_norm": 1.2732399702072144, "learning_rate": 4.7906973719272617e-05, "loss": 0.9115, "step": 2170 }, { "epoch": 0.7075471698113207, "grad_norm": 1.2821569442749023, "learning_rate": 4.7896624239617175e-05, "loss": 0.8858, "step": 2175 }, { "epoch": 0.7091737150292778, "grad_norm": 1.4602237939834595, "learning_rate": 4.7886250358818565e-05, "loss": 0.8837, "step": 2180 }, { "epoch": 0.7108002602472349, "grad_norm": 1.2361509799957275, "learning_rate": 4.787585208793231e-05, "loss": 0.9278, "step": 2185 }, { "epoch": 0.7124268054651919, "grad_norm": 1.44894278049469, "learning_rate": 4.7865429438039955e-05, "loss": 0.9098, "step": 2190 }, { "epoch": 0.714053350683149, "grad_norm": 1.1851742267608643, "learning_rate": 4.7854982420249006e-05, "loss": 0.9004, "step": 2195 }, { "epoch": 0.715679895901106, "grad_norm": 1.3741412162780762, "learning_rate": 4.784451104569295e-05, "loss": 0.8742, "step": 2200 }, { "epoch": 0.7173064411190632, "grad_norm": 1.5128581523895264, "learning_rate": 4.783401532553123e-05, "loss": 0.9259, "step": 2205 }, { "epoch": 0.7189329863370202, "grad_norm": 1.4056744575500488, "learning_rate": 4.7823495270949225e-05, "loss": 0.8851, "step": 2210 }, { "epoch": 0.7205595315549772, "grad_norm": 1.4442654848098755, "learning_rate": 4.781295089315826e-05, "loss": 0.9064, "step": 2215 }, { "epoch": 0.7221860767729343, "grad_norm": 1.3330752849578857, "learning_rate": 4.780238220339558e-05, "loss": 0.9061, "step": 2220 }, { "epoch": 0.7238126219908914, "grad_norm": 1.152594804763794, "learning_rate": 4.779178921292432e-05, "loss": 0.919, "step": 2225 }, { "epoch": 0.7254391672088484, "grad_norm": 1.2645729780197144, "learning_rate": 4.778117193303354e-05, "loss": 0.914, "step": 2230 }, { "epoch": 0.7270657124268055, "grad_norm": 1.4880859851837158, "learning_rate": 4.7770530375038164e-05, "loss": 0.8844, "step": 2235 }, { "epoch": 0.7286922576447625, "grad_norm": 1.4192218780517578, "learning_rate": 4.7759864550279004e-05, "loss": 0.9375, "step": 2240 }, { "epoch": 0.7303188028627196, "grad_norm": 2.017812967300415, "learning_rate": 4.774917447012273e-05, "loss": 0.8891, "step": 2245 }, { "epoch": 0.7319453480806767, "grad_norm": 1.9211117029190063, "learning_rate": 4.773846014596185e-05, "loss": 0.9075, "step": 2250 }, { "epoch": 0.7335718932986337, "grad_norm": 1.5105257034301758, "learning_rate": 4.7727721589214724e-05, "loss": 0.9206, "step": 2255 }, { "epoch": 0.7351984385165907, "grad_norm": 1.4530128240585327, "learning_rate": 4.771695881132553e-05, "loss": 0.8995, "step": 2260 }, { "epoch": 0.7368249837345479, "grad_norm": 1.7887240648269653, "learning_rate": 4.7706171823764256e-05, "loss": 0.9408, "step": 2265 }, { "epoch": 0.7384515289525049, "grad_norm": 1.1628086566925049, "learning_rate": 4.769536063802671e-05, "loss": 0.8832, "step": 2270 }, { "epoch": 0.7400780741704619, "grad_norm": 1.3487486839294434, "learning_rate": 4.768452526563445e-05, "loss": 0.9265, "step": 2275 }, { "epoch": 0.741704619388419, "grad_norm": 1.3200750350952148, "learning_rate": 4.767366571813484e-05, "loss": 0.8875, "step": 2280 }, { "epoch": 0.743331164606376, "grad_norm": 1.6373474597930908, "learning_rate": 4.7662782007101e-05, "loss": 0.9107, "step": 2285 }, { "epoch": 0.7449577098243331, "grad_norm": 1.5338679552078247, "learning_rate": 4.76518741441318e-05, "loss": 0.8662, "step": 2290 }, { "epoch": 0.7465842550422902, "grad_norm": 1.4157332181930542, "learning_rate": 4.7640942140851863e-05, "loss": 0.895, "step": 2295 }, { "epoch": 0.7482108002602472, "grad_norm": 1.620431661605835, "learning_rate": 4.762998600891151e-05, "loss": 0.9, "step": 2300 }, { "epoch": 0.7498373454782042, "grad_norm": 1.3113834857940674, "learning_rate": 4.761900575998679e-05, "loss": 0.9092, "step": 2305 }, { "epoch": 0.7514638906961614, "grad_norm": 1.6634944677352905, "learning_rate": 4.7608001405779466e-05, "loss": 0.9151, "step": 2310 }, { "epoch": 0.7530904359141184, "grad_norm": 1.5336637496948242, "learning_rate": 4.759697295801697e-05, "loss": 0.9186, "step": 2315 }, { "epoch": 0.7547169811320755, "grad_norm": 1.4746085405349731, "learning_rate": 4.7585920428452435e-05, "loss": 0.9105, "step": 2320 }, { "epoch": 0.7563435263500325, "grad_norm": 1.7900280952453613, "learning_rate": 4.757484382886463e-05, "loss": 0.9452, "step": 2325 }, { "epoch": 0.7579700715679896, "grad_norm": 1.8564130067825317, "learning_rate": 4.7563743171057996e-05, "loss": 0.9133, "step": 2330 }, { "epoch": 0.7595966167859467, "grad_norm": 1.4008771181106567, "learning_rate": 4.75526184668626e-05, "loss": 0.908, "step": 2335 }, { "epoch": 0.7612231620039037, "grad_norm": 1.6394418478012085, "learning_rate": 4.7541469728134133e-05, "loss": 0.9185, "step": 2340 }, { "epoch": 0.7628497072218607, "grad_norm": 1.4217300415039062, "learning_rate": 4.753029696675394e-05, "loss": 0.8834, "step": 2345 }, { "epoch": 0.7644762524398179, "grad_norm": 1.330362319946289, "learning_rate": 4.751910019462891e-05, "loss": 0.8776, "step": 2350 }, { "epoch": 0.7661027976577749, "grad_norm": 1.4722425937652588, "learning_rate": 4.750787942369155e-05, "loss": 0.8812, "step": 2355 }, { "epoch": 0.7677293428757319, "grad_norm": 1.409554362297058, "learning_rate": 4.7496634665899934e-05, "loss": 0.882, "step": 2360 }, { "epoch": 0.769355888093689, "grad_norm": 1.1811152696609497, "learning_rate": 4.7485365933237715e-05, "loss": 0.9001, "step": 2365 }, { "epoch": 0.7709824333116461, "grad_norm": 1.621793508529663, "learning_rate": 4.7474073237714084e-05, "loss": 0.8859, "step": 2370 }, { "epoch": 0.7726089785296031, "grad_norm": 1.3503172397613525, "learning_rate": 4.7462756591363766e-05, "loss": 0.8783, "step": 2375 }, { "epoch": 0.7742355237475602, "grad_norm": 1.3635481595993042, "learning_rate": 4.745141600624702e-05, "loss": 0.9324, "step": 2380 }, { "epoch": 0.7758620689655172, "grad_norm": 1.522403597831726, "learning_rate": 4.74400514944496e-05, "loss": 0.8905, "step": 2385 }, { "epoch": 0.7774886141834743, "grad_norm": 1.4917200803756714, "learning_rate": 4.742866306808278e-05, "loss": 0.8944, "step": 2390 }, { "epoch": 0.7791151594014314, "grad_norm": 2.1061832904815674, "learning_rate": 4.7417250739283317e-05, "loss": 0.9652, "step": 2395 }, { "epoch": 0.7807417046193884, "grad_norm": 1.6707963943481445, "learning_rate": 4.740581452021343e-05, "loss": 0.9187, "step": 2400 }, { "epoch": 0.7823682498373454, "grad_norm": 1.6848689317703247, "learning_rate": 4.739435442306079e-05, "loss": 0.9165, "step": 2405 }, { "epoch": 0.7839947950553026, "grad_norm": 1.808882236480713, "learning_rate": 4.738287046003854e-05, "loss": 0.8752, "step": 2410 }, { "epoch": 0.7856213402732596, "grad_norm": 1.2108557224273682, "learning_rate": 4.7371362643385255e-05, "loss": 0.9161, "step": 2415 }, { "epoch": 0.7872478854912166, "grad_norm": 1.0468549728393555, "learning_rate": 4.735983098536491e-05, "loss": 0.9129, "step": 2420 }, { "epoch": 0.7888744307091737, "grad_norm": 1.52243173122406, "learning_rate": 4.734827549826688e-05, "loss": 0.8912, "step": 2425 }, { "epoch": 0.7905009759271308, "grad_norm": 1.3424164056777954, "learning_rate": 4.7336696194405994e-05, "loss": 0.8988, "step": 2430 }, { "epoch": 0.7921275211450879, "grad_norm": 1.2184618711471558, "learning_rate": 4.732509308612239e-05, "loss": 0.9041, "step": 2435 }, { "epoch": 0.7937540663630449, "grad_norm": 1.3695695400238037, "learning_rate": 4.731346618578162e-05, "loss": 0.8638, "step": 2440 }, { "epoch": 0.7953806115810019, "grad_norm": 1.3017903566360474, "learning_rate": 4.7301815505774583e-05, "loss": 0.8437, "step": 2445 }, { "epoch": 0.7970071567989591, "grad_norm": 1.4025452136993408, "learning_rate": 4.7290141058517504e-05, "loss": 0.8986, "step": 2450 }, { "epoch": 0.7986337020169161, "grad_norm": 1.1980440616607666, "learning_rate": 4.727844285645196e-05, "loss": 0.8751, "step": 2455 }, { "epoch": 0.8002602472348731, "grad_norm": 1.3725088834762573, "learning_rate": 4.726672091204483e-05, "loss": 0.8674, "step": 2460 }, { "epoch": 0.8018867924528302, "grad_norm": 1.3737421035766602, "learning_rate": 4.725497523778829e-05, "loss": 0.9022, "step": 2465 }, { "epoch": 0.8035133376707873, "grad_norm": 1.3868235349655151, "learning_rate": 4.724320584619982e-05, "loss": 0.8761, "step": 2470 }, { "epoch": 0.8051398828887443, "grad_norm": 1.4456682205200195, "learning_rate": 4.723141274982218e-05, "loss": 0.8854, "step": 2475 }, { "epoch": 0.8067664281067014, "grad_norm": 1.5126333236694336, "learning_rate": 4.7219595961223354e-05, "loss": 0.9043, "step": 2480 }, { "epoch": 0.8083929733246584, "grad_norm": 1.6392046213150024, "learning_rate": 4.7207755492996624e-05, "loss": 0.8932, "step": 2485 }, { "epoch": 0.8100195185426154, "grad_norm": 1.7545911073684692, "learning_rate": 4.719589135776048e-05, "loss": 0.8558, "step": 2490 }, { "epoch": 0.8116460637605726, "grad_norm": 1.3242253065109253, "learning_rate": 4.718400356815864e-05, "loss": 0.8766, "step": 2495 }, { "epoch": 0.8132726089785296, "grad_norm": 1.7495157718658447, "learning_rate": 4.717209213686003e-05, "loss": 0.8885, "step": 2500 }, { "epoch": 0.8148991541964866, "grad_norm": 1.633396863937378, "learning_rate": 4.716015707655877e-05, "loss": 0.8753, "step": 2505 }, { "epoch": 0.8165256994144438, "grad_norm": 1.9030710458755493, "learning_rate": 4.7148198399974174e-05, "loss": 0.9089, "step": 2510 }, { "epoch": 0.8181522446324008, "grad_norm": 1.7446460723876953, "learning_rate": 4.7136216119850705e-05, "loss": 0.8736, "step": 2515 }, { "epoch": 0.8197787898503578, "grad_norm": 1.4798496961593628, "learning_rate": 4.712421024895799e-05, "loss": 0.879, "step": 2520 }, { "epoch": 0.8214053350683149, "grad_norm": 1.3805863857269287, "learning_rate": 4.7112180800090786e-05, "loss": 0.9053, "step": 2525 }, { "epoch": 0.8230318802862719, "grad_norm": 1.631475567817688, "learning_rate": 4.710012778606901e-05, "loss": 0.8994, "step": 2530 }, { "epoch": 0.824658425504229, "grad_norm": 1.383959412574768, "learning_rate": 4.708805121973765e-05, "loss": 0.889, "step": 2535 }, { "epoch": 0.8262849707221861, "grad_norm": 1.3393383026123047, "learning_rate": 4.7075951113966833e-05, "loss": 0.8745, "step": 2540 }, { "epoch": 0.8279115159401431, "grad_norm": 1.2212260961532593, "learning_rate": 4.706382748165173e-05, "loss": 0.9062, "step": 2545 }, { "epoch": 0.8295380611581002, "grad_norm": 1.4094233512878418, "learning_rate": 4.705168033571262e-05, "loss": 0.8785, "step": 2550 }, { "epoch": 0.8311646063760573, "grad_norm": 1.528883457183838, "learning_rate": 4.703950968909483e-05, "loss": 0.862, "step": 2555 }, { "epoch": 0.8327911515940143, "grad_norm": 1.5031776428222656, "learning_rate": 4.7027315554768725e-05, "loss": 0.9039, "step": 2560 }, { "epoch": 0.8344176968119714, "grad_norm": 1.2601547241210938, "learning_rate": 4.7015097945729716e-05, "loss": 0.8877, "step": 2565 }, { "epoch": 0.8360442420299284, "grad_norm": 1.3196868896484375, "learning_rate": 4.700285687499821e-05, "loss": 0.9237, "step": 2570 }, { "epoch": 0.8376707872478855, "grad_norm": 1.3062645196914673, "learning_rate": 4.6990592355619636e-05, "loss": 0.925, "step": 2575 }, { "epoch": 0.8392973324658426, "grad_norm": 1.3196909427642822, "learning_rate": 4.6978304400664394e-05, "loss": 0.8834, "step": 2580 }, { "epoch": 0.8409238776837996, "grad_norm": 1.734815001487732, "learning_rate": 4.6965993023227884e-05, "loss": 0.8736, "step": 2585 }, { "epoch": 0.8425504229017566, "grad_norm": 1.677929162979126, "learning_rate": 4.6953658236430446e-05, "loss": 0.9021, "step": 2590 }, { "epoch": 0.8441769681197138, "grad_norm": 1.5291372537612915, "learning_rate": 4.694130005341738e-05, "loss": 0.8929, "step": 2595 }, { "epoch": 0.8458035133376708, "grad_norm": 1.5010364055633545, "learning_rate": 4.692891848735892e-05, "loss": 0.8938, "step": 2600 }, { "epoch": 0.8474300585556278, "grad_norm": 1.6797833442687988, "learning_rate": 4.69165135514502e-05, "loss": 0.8978, "step": 2605 }, { "epoch": 0.8490566037735849, "grad_norm": 1.5080785751342773, "learning_rate": 4.690408525891129e-05, "loss": 0.9132, "step": 2610 }, { "epoch": 0.850683148991542, "grad_norm": 1.5223890542984009, "learning_rate": 4.689163362298712e-05, "loss": 0.8613, "step": 2615 }, { "epoch": 0.852309694209499, "grad_norm": 1.2836233377456665, "learning_rate": 4.687915865694753e-05, "loss": 0.9149, "step": 2620 }, { "epoch": 0.8539362394274561, "grad_norm": 1.4054441452026367, "learning_rate": 4.68666603740872e-05, "loss": 0.8972, "step": 2625 }, { "epoch": 0.8555627846454131, "grad_norm": 1.5460009574890137, "learning_rate": 4.685413878772566e-05, "loss": 0.8848, "step": 2630 }, { "epoch": 0.8571893298633702, "grad_norm": 1.4086488485336304, "learning_rate": 4.6841593911207284e-05, "loss": 0.8871, "step": 2635 }, { "epoch": 0.8588158750813273, "grad_norm": 1.3456872701644897, "learning_rate": 4.682902575790126e-05, "loss": 0.9136, "step": 2640 }, { "epoch": 0.8604424202992843, "grad_norm": 1.3031237125396729, "learning_rate": 4.681643434120159e-05, "loss": 0.9064, "step": 2645 }, { "epoch": 0.8620689655172413, "grad_norm": 1.306841492652893, "learning_rate": 4.680381967452706e-05, "loss": 0.921, "step": 2650 }, { "epoch": 0.8636955107351985, "grad_norm": 1.3150807619094849, "learning_rate": 4.679118177132123e-05, "loss": 0.8844, "step": 2655 }, { "epoch": 0.8653220559531555, "grad_norm": 1.3911503553390503, "learning_rate": 4.677852064505243e-05, "loss": 0.8798, "step": 2660 }, { "epoch": 0.8669486011711126, "grad_norm": 1.567347764968872, "learning_rate": 4.676583630921375e-05, "loss": 0.9045, "step": 2665 }, { "epoch": 0.8685751463890696, "grad_norm": 1.4236321449279785, "learning_rate": 4.6753128777323e-05, "loss": 0.9217, "step": 2670 }, { "epoch": 0.8702016916070267, "grad_norm": 1.3262699842453003, "learning_rate": 4.674039806292271e-05, "loss": 0.9104, "step": 2675 }, { "epoch": 0.8718282368249838, "grad_norm": 2.200030565261841, "learning_rate": 4.6727644179580116e-05, "loss": 0.9022, "step": 2680 }, { "epoch": 0.8734547820429408, "grad_norm": 1.4918696880340576, "learning_rate": 4.671486714088715e-05, "loss": 0.9065, "step": 2685 }, { "epoch": 0.8750813272608978, "grad_norm": 1.3897427320480347, "learning_rate": 4.670206696046043e-05, "loss": 0.8672, "step": 2690 }, { "epoch": 0.876707872478855, "grad_norm": 1.2895896434783936, "learning_rate": 4.668924365194122e-05, "loss": 0.9005, "step": 2695 }, { "epoch": 0.878334417696812, "grad_norm": 1.4342011213302612, "learning_rate": 4.6676397228995436e-05, "loss": 0.9064, "step": 2700 }, { "epoch": 0.879960962914769, "grad_norm": 1.3006962537765503, "learning_rate": 4.666352770531363e-05, "loss": 0.8879, "step": 2705 }, { "epoch": 0.8815875081327261, "grad_norm": 1.392305612564087, "learning_rate": 4.665063509461097e-05, "loss": 0.9042, "step": 2710 }, { "epoch": 0.8832140533506831, "grad_norm": 1.2981715202331543, "learning_rate": 4.6637719410627235e-05, "loss": 0.8693, "step": 2715 }, { "epoch": 0.8848405985686402, "grad_norm": 1.6804430484771729, "learning_rate": 4.6624780667126774e-05, "loss": 0.8612, "step": 2720 }, { "epoch": 0.8864671437865973, "grad_norm": 1.8398326635360718, "learning_rate": 4.661181887789855e-05, "loss": 0.9062, "step": 2725 }, { "epoch": 0.8880936890045543, "grad_norm": 1.312214970588684, "learning_rate": 4.6598834056756044e-05, "loss": 0.8967, "step": 2730 }, { "epoch": 0.8897202342225113, "grad_norm": 1.310838222503662, "learning_rate": 4.65858262175373e-05, "loss": 0.8688, "step": 2735 }, { "epoch": 0.8913467794404685, "grad_norm": 1.5863673686981201, "learning_rate": 4.65727953741049e-05, "loss": 0.9344, "step": 2740 }, { "epoch": 0.8929733246584255, "grad_norm": 1.4352715015411377, "learning_rate": 4.655974154034591e-05, "loss": 0.8835, "step": 2745 }, { "epoch": 0.8945998698763825, "grad_norm": 1.1684788465499878, "learning_rate": 4.654666473017196e-05, "loss": 0.8938, "step": 2750 }, { "epoch": 0.8962264150943396, "grad_norm": 2.2960214614868164, "learning_rate": 4.653356495751909e-05, "loss": 0.8866, "step": 2755 }, { "epoch": 0.8978529603122967, "grad_norm": 1.143605351448059, "learning_rate": 4.6520442236347885e-05, "loss": 0.8762, "step": 2760 }, { "epoch": 0.8994795055302537, "grad_norm": 1.2188891172409058, "learning_rate": 4.650729658064331e-05, "loss": 0.883, "step": 2765 }, { "epoch": 0.9011060507482108, "grad_norm": 2.5894458293914795, "learning_rate": 4.649412800441484e-05, "loss": 0.8281, "step": 2770 }, { "epoch": 0.9027325959661678, "grad_norm": 1.4641715288162231, "learning_rate": 4.6480936521696336e-05, "loss": 0.9249, "step": 2775 }, { "epoch": 0.904359141184125, "grad_norm": 1.2389887571334839, "learning_rate": 4.646772214654608e-05, "loss": 0.9061, "step": 2780 }, { "epoch": 0.905985686402082, "grad_norm": 1.3739203214645386, "learning_rate": 4.645448489304677e-05, "loss": 0.9035, "step": 2785 }, { "epoch": 0.907612231620039, "grad_norm": 1.2330043315887451, "learning_rate": 4.644122477530545e-05, "loss": 0.8838, "step": 2790 }, { "epoch": 0.9092387768379961, "grad_norm": 1.3920153379440308, "learning_rate": 4.6427941807453546e-05, "loss": 0.9266, "step": 2795 }, { "epoch": 0.9108653220559532, "grad_norm": 1.2262370586395264, "learning_rate": 4.641463600364686e-05, "loss": 0.8685, "step": 2800 }, { "epoch": 0.9124918672739102, "grad_norm": 1.2604455947875977, "learning_rate": 4.6401307378065504e-05, "loss": 0.8783, "step": 2805 }, { "epoch": 0.9141184124918673, "grad_norm": 1.3860328197479248, "learning_rate": 4.638795594491391e-05, "loss": 0.9285, "step": 2810 }, { "epoch": 0.9157449577098243, "grad_norm": 1.2230820655822754, "learning_rate": 4.6374581718420816e-05, "loss": 0.8713, "step": 2815 }, { "epoch": 0.9173715029277814, "grad_norm": 1.3804121017456055, "learning_rate": 4.636118471283927e-05, "loss": 0.8416, "step": 2820 }, { "epoch": 0.9189980481457385, "grad_norm": 1.975463628768921, "learning_rate": 4.6347764942446586e-05, "loss": 0.9083, "step": 2825 }, { "epoch": 0.9206245933636955, "grad_norm": 1.3841066360473633, "learning_rate": 4.633432242154433e-05, "loss": 0.9191, "step": 2830 }, { "epoch": 0.9222511385816525, "grad_norm": 1.4560565948486328, "learning_rate": 4.632085716445832e-05, "loss": 0.8828, "step": 2835 }, { "epoch": 0.9238776837996097, "grad_norm": 1.326651692390442, "learning_rate": 4.6307369185538615e-05, "loss": 0.8824, "step": 2840 }, { "epoch": 0.9255042290175667, "grad_norm": 1.4383437633514404, "learning_rate": 4.629385849915946e-05, "loss": 0.8822, "step": 2845 }, { "epoch": 0.9271307742355237, "grad_norm": 1.440675139427185, "learning_rate": 4.628032511971934e-05, "loss": 0.8837, "step": 2850 }, { "epoch": 0.9287573194534808, "grad_norm": 1.3136472702026367, "learning_rate": 4.626676906164088e-05, "loss": 0.8702, "step": 2855 }, { "epoch": 0.9303838646714379, "grad_norm": 1.3955410718917847, "learning_rate": 4.625319033937091e-05, "loss": 0.8922, "step": 2860 }, { "epoch": 0.9320104098893949, "grad_norm": 1.4834078550338745, "learning_rate": 4.6239588967380395e-05, "loss": 0.8678, "step": 2865 }, { "epoch": 0.933636955107352, "grad_norm": 1.6584221124649048, "learning_rate": 4.6225964960164446e-05, "loss": 0.8983, "step": 2870 }, { "epoch": 0.935263500325309, "grad_norm": 1.3869572877883911, "learning_rate": 4.621231833224229e-05, "loss": 0.9001, "step": 2875 }, { "epoch": 0.936890045543266, "grad_norm": 1.1432338953018188, "learning_rate": 4.619864909815726e-05, "loss": 0.8908, "step": 2880 }, { "epoch": 0.9385165907612232, "grad_norm": 1.4364080429077148, "learning_rate": 4.618495727247678e-05, "loss": 0.8808, "step": 2885 }, { "epoch": 0.9401431359791802, "grad_norm": 1.2910454273223877, "learning_rate": 4.6171242869792374e-05, "loss": 0.8863, "step": 2890 }, { "epoch": 0.9417696811971373, "grad_norm": 1.3370767831802368, "learning_rate": 4.615750590471959e-05, "loss": 0.8907, "step": 2895 }, { "epoch": 0.9433962264150944, "grad_norm": 1.2998255491256714, "learning_rate": 4.6143746391898044e-05, "loss": 0.8684, "step": 2900 }, { "epoch": 0.9450227716330514, "grad_norm": 1.5886045694351196, "learning_rate": 4.6129964345991385e-05, "loss": 0.8951, "step": 2905 }, { "epoch": 0.9466493168510085, "grad_norm": 1.482511281967163, "learning_rate": 4.611615978168725e-05, "loss": 0.8952, "step": 2910 }, { "epoch": 0.9482758620689655, "grad_norm": 1.1892169713974, "learning_rate": 4.610233271369729e-05, "loss": 0.8742, "step": 2915 }, { "epoch": 0.9499024072869225, "grad_norm": 1.2602440118789673, "learning_rate": 4.608848315675715e-05, "loss": 0.9004, "step": 2920 }, { "epoch": 0.9515289525048797, "grad_norm": 1.2956856489181519, "learning_rate": 4.6074611125626436e-05, "loss": 0.8786, "step": 2925 }, { "epoch": 0.9531554977228367, "grad_norm": 1.416490912437439, "learning_rate": 4.6060716635088676e-05, "loss": 0.9027, "step": 2930 }, { "epoch": 0.9547820429407937, "grad_norm": 1.337730884552002, "learning_rate": 4.604679969995137e-05, "loss": 0.8769, "step": 2935 }, { "epoch": 0.9564085881587508, "grad_norm": 1.163804054260254, "learning_rate": 4.6032860335045924e-05, "loss": 0.8771, "step": 2940 }, { "epoch": 0.9580351333767079, "grad_norm": 1.1030604839324951, "learning_rate": 4.601889855522765e-05, "loss": 0.8812, "step": 2945 }, { "epoch": 0.9596616785946649, "grad_norm": 1.2699700593948364, "learning_rate": 4.600491437537574e-05, "loss": 0.8947, "step": 2950 }, { "epoch": 0.961288223812622, "grad_norm": 1.7778109312057495, "learning_rate": 4.599090781039327e-05, "loss": 0.9163, "step": 2955 }, { "epoch": 0.962914769030579, "grad_norm": 1.5649638175964355, "learning_rate": 4.597687887520715e-05, "loss": 0.8735, "step": 2960 }, { "epoch": 0.9645413142485361, "grad_norm": 1.3073519468307495, "learning_rate": 4.596282758476816e-05, "loss": 0.8858, "step": 2965 }, { "epoch": 0.9661678594664932, "grad_norm": 1.1120704412460327, "learning_rate": 4.594875395405089e-05, "loss": 0.864, "step": 2970 }, { "epoch": 0.9677944046844502, "grad_norm": 1.3906222581863403, "learning_rate": 4.593465799805373e-05, "loss": 0.8967, "step": 2975 }, { "epoch": 0.9694209499024072, "grad_norm": 1.4969758987426758, "learning_rate": 4.592053973179888e-05, "loss": 0.8854, "step": 2980 }, { "epoch": 0.9710474951203644, "grad_norm": 1.6781532764434814, "learning_rate": 4.590639917033229e-05, "loss": 0.9221, "step": 2985 }, { "epoch": 0.9726740403383214, "grad_norm": 1.2319928407669067, "learning_rate": 4.58922363287237e-05, "loss": 0.8902, "step": 2990 }, { "epoch": 0.9743005855562785, "grad_norm": 1.184988021850586, "learning_rate": 4.587805122206658e-05, "loss": 0.9114, "step": 2995 }, { "epoch": 0.9759271307742355, "grad_norm": 1.4273937940597534, "learning_rate": 4.586384386547811e-05, "loss": 0.8506, "step": 3000 }, { "epoch": 0.9775536759921926, "grad_norm": 1.3128321170806885, "learning_rate": 4.5849614274099225e-05, "loss": 0.8894, "step": 3005 }, { "epoch": 0.9791802212101497, "grad_norm": 1.4246121644973755, "learning_rate": 4.583536246309453e-05, "loss": 0.8924, "step": 3010 }, { "epoch": 0.9808067664281067, "grad_norm": 1.5242570638656616, "learning_rate": 4.582108844765229e-05, "loss": 0.8789, "step": 3015 }, { "epoch": 0.9824333116460637, "grad_norm": 1.3728033304214478, "learning_rate": 4.5806792242984474e-05, "loss": 0.8804, "step": 3020 }, { "epoch": 0.9840598568640209, "grad_norm": 1.5065276622772217, "learning_rate": 4.579247386432668e-05, "loss": 0.8645, "step": 3025 }, { "epoch": 0.9856864020819779, "grad_norm": 1.2983787059783936, "learning_rate": 4.577813332693812e-05, "loss": 0.8885, "step": 3030 }, { "epoch": 0.9873129472999349, "grad_norm": 1.2127952575683594, "learning_rate": 4.576377064610164e-05, "loss": 0.8836, "step": 3035 }, { "epoch": 0.988939492517892, "grad_norm": 1.419260859489441, "learning_rate": 4.5749385837123704e-05, "loss": 0.9065, "step": 3040 }, { "epoch": 0.9905660377358491, "grad_norm": 1.3550113439559937, "learning_rate": 4.5734978915334304e-05, "loss": 0.8881, "step": 3045 }, { "epoch": 0.9921925829538061, "grad_norm": 1.350577712059021, "learning_rate": 4.572054989608705e-05, "loss": 0.8883, "step": 3050 }, { "epoch": 0.9938191281717632, "grad_norm": 1.504529595375061, "learning_rate": 4.570609879475907e-05, "loss": 0.9133, "step": 3055 }, { "epoch": 0.9954456733897202, "grad_norm": 1.9724973440170288, "learning_rate": 4.569162562675104e-05, "loss": 0.8879, "step": 3060 }, { "epoch": 0.9970722186076773, "grad_norm": 1.8143373727798462, "learning_rate": 4.5677130407487146e-05, "loss": 0.8855, "step": 3065 }, { "epoch": 0.9986987638256344, "grad_norm": 1.2685192823410034, "learning_rate": 4.566261315241508e-05, "loss": 0.893, "step": 3070 }, { "epoch": 1.0, "eval_f1": 0.8037560325813309, "eval_loss": 0.433349609375, "eval_precision": 0.807616054319172, "eval_recall": 0.8021557797293335, "eval_runtime": 386.1683, "eval_samples_per_second": 1018.817, "eval_steps_per_second": 1.991, "step": 3074 }, { "epoch": 1.0003253090435915, "grad_norm": 1.1765072345733643, "learning_rate": 4.5648073877006015e-05, "loss": 0.9082, "step": 3075 }, { "epoch": 1.0019518542615484, "grad_norm": 1.153677225112915, "learning_rate": 4.563351259675459e-05, "loss": 0.8622, "step": 3080 }, { "epoch": 1.0035783994795056, "grad_norm": 1.3374100923538208, "learning_rate": 4.561892932717889e-05, "loss": 0.8422, "step": 3085 }, { "epoch": 1.0052049446974627, "grad_norm": 1.4088090658187866, "learning_rate": 4.560432408382045e-05, "loss": 0.844, "step": 3090 }, { "epoch": 1.0068314899154196, "grad_norm": 1.291945219039917, "learning_rate": 4.558969688224419e-05, "loss": 0.8199, "step": 3095 }, { "epoch": 1.0084580351333767, "grad_norm": 1.2867183685302734, "learning_rate": 4.557504773803848e-05, "loss": 0.7941, "step": 3100 }, { "epoch": 1.0100845803513339, "grad_norm": 1.8451282978057861, "learning_rate": 4.556037666681504e-05, "loss": 0.8482, "step": 3105 }, { "epoch": 1.0117111255692908, "grad_norm": 1.1598424911499023, "learning_rate": 4.5545683684208956e-05, "loss": 0.8516, "step": 3110 }, { "epoch": 1.013337670787248, "grad_norm": 1.5371285676956177, "learning_rate": 4.553096880587868e-05, "loss": 0.8441, "step": 3115 }, { "epoch": 1.014964216005205, "grad_norm": 1.3084255456924438, "learning_rate": 4.5516232047506e-05, "loss": 0.8646, "step": 3120 }, { "epoch": 1.016590761223162, "grad_norm": 1.26137113571167, "learning_rate": 4.5501473424796006e-05, "loss": 0.8542, "step": 3125 }, { "epoch": 1.018217306441119, "grad_norm": 1.456751823425293, "learning_rate": 4.54866929534771e-05, "loss": 0.8286, "step": 3130 }, { "epoch": 1.0198438516590762, "grad_norm": 1.7253113985061646, "learning_rate": 4.5471890649300966e-05, "loss": 0.8075, "step": 3135 }, { "epoch": 1.0214703968770331, "grad_norm": 1.271817922592163, "learning_rate": 4.545706652804257e-05, "loss": 0.8448, "step": 3140 }, { "epoch": 1.0230969420949902, "grad_norm": 1.3296678066253662, "learning_rate": 4.5442220605500084e-05, "loss": 0.8087, "step": 3145 }, { "epoch": 1.0247234873129474, "grad_norm": 1.412864089012146, "learning_rate": 4.5427352897494976e-05, "loss": 0.8875, "step": 3150 }, { "epoch": 1.0263500325309043, "grad_norm": 1.459274172782898, "learning_rate": 4.541246341987188e-05, "loss": 0.8426, "step": 3155 }, { "epoch": 1.0279765777488614, "grad_norm": 1.540281057357788, "learning_rate": 4.539755218849866e-05, "loss": 0.848, "step": 3160 }, { "epoch": 1.0296031229668186, "grad_norm": 1.1720516681671143, "learning_rate": 4.5382619219266344e-05, "loss": 0.8478, "step": 3165 }, { "epoch": 1.0312296681847755, "grad_norm": 1.8027592897415161, "learning_rate": 4.536766452808914e-05, "loss": 0.8281, "step": 3170 }, { "epoch": 1.0328562134027326, "grad_norm": 1.347786545753479, "learning_rate": 4.535268813090439e-05, "loss": 0.8236, "step": 3175 }, { "epoch": 1.0344827586206897, "grad_norm": 1.5309745073318481, "learning_rate": 4.5337690043672596e-05, "loss": 0.8432, "step": 3180 }, { "epoch": 1.0361093038386466, "grad_norm": 1.2138981819152832, "learning_rate": 4.532267028237734e-05, "loss": 0.8434, "step": 3185 }, { "epoch": 1.0377358490566038, "grad_norm": 1.7488796710968018, "learning_rate": 4.530762886302533e-05, "loss": 0.8674, "step": 3190 }, { "epoch": 1.039362394274561, "grad_norm": 1.6811249256134033, "learning_rate": 4.5292565801646345e-05, "loss": 0.8327, "step": 3195 }, { "epoch": 1.0409889394925178, "grad_norm": 1.3569321632385254, "learning_rate": 4.527748111429323e-05, "loss": 0.8543, "step": 3200 }, { "epoch": 1.042615484710475, "grad_norm": 1.6806261539459229, "learning_rate": 4.5262374817041866e-05, "loss": 0.8375, "step": 3205 }, { "epoch": 1.044242029928432, "grad_norm": 1.279037356376648, "learning_rate": 4.5247246925991185e-05, "loss": 0.8153, "step": 3210 }, { "epoch": 1.045868575146389, "grad_norm": 1.3666880130767822, "learning_rate": 4.52320974572631e-05, "loss": 0.8497, "step": 3215 }, { "epoch": 1.047495120364346, "grad_norm": 1.3386924266815186, "learning_rate": 4.521692642700256e-05, "loss": 0.8067, "step": 3220 }, { "epoch": 1.0491216655823032, "grad_norm": 1.5989760160446167, "learning_rate": 4.520173385137746e-05, "loss": 0.8329, "step": 3225 }, { "epoch": 1.0507482108002602, "grad_norm": 1.301072597503662, "learning_rate": 4.518651974657868e-05, "loss": 0.8358, "step": 3230 }, { "epoch": 1.0523747560182173, "grad_norm": 1.4418226480484009, "learning_rate": 4.517128412882002e-05, "loss": 0.854, "step": 3235 }, { "epoch": 1.0540013012361744, "grad_norm": 1.4297183752059937, "learning_rate": 4.515602701433822e-05, "loss": 0.8391, "step": 3240 }, { "epoch": 1.0556278464541313, "grad_norm": 1.52646005153656, "learning_rate": 4.5140748419392925e-05, "loss": 0.827, "step": 3245 }, { "epoch": 1.0572543916720885, "grad_norm": 1.5673209428787231, "learning_rate": 4.512544836026668e-05, "loss": 0.8623, "step": 3250 }, { "epoch": 1.0588809368900456, "grad_norm": 1.285749912261963, "learning_rate": 4.51101268532649e-05, "loss": 0.8512, "step": 3255 }, { "epoch": 1.0605074821080025, "grad_norm": 1.3538873195648193, "learning_rate": 4.509478391471584e-05, "loss": 0.8562, "step": 3260 }, { "epoch": 1.0621340273259596, "grad_norm": 1.7159193754196167, "learning_rate": 4.5079419560970635e-05, "loss": 0.8611, "step": 3265 }, { "epoch": 1.0637605725439168, "grad_norm": 1.3144075870513916, "learning_rate": 4.506403380840321e-05, "loss": 0.8279, "step": 3270 }, { "epoch": 1.065387117761874, "grad_norm": 1.4127336740493774, "learning_rate": 4.504862667341029e-05, "loss": 0.8397, "step": 3275 }, { "epoch": 1.0670136629798308, "grad_norm": 1.3163704872131348, "learning_rate": 4.503319817241141e-05, "loss": 0.8247, "step": 3280 }, { "epoch": 1.068640208197788, "grad_norm": 1.5540528297424316, "learning_rate": 4.501774832184887e-05, "loss": 0.8524, "step": 3285 }, { "epoch": 1.070266753415745, "grad_norm": 1.4497528076171875, "learning_rate": 4.500227713818771e-05, "loss": 0.8417, "step": 3290 }, { "epoch": 1.071893298633702, "grad_norm": 1.3648549318313599, "learning_rate": 4.4986784637915716e-05, "loss": 0.8481, "step": 3295 }, { "epoch": 1.073519843851659, "grad_norm": 1.4077669382095337, "learning_rate": 4.497127083754339e-05, "loss": 0.8491, "step": 3300 }, { "epoch": 1.0751463890696162, "grad_norm": 1.279617428779602, "learning_rate": 4.4955735753603935e-05, "loss": 0.8468, "step": 3305 }, { "epoch": 1.0767729342875731, "grad_norm": 1.3531652688980103, "learning_rate": 4.494017940265323e-05, "loss": 0.8135, "step": 3310 }, { "epoch": 1.0783994795055303, "grad_norm": 1.3865724802017212, "learning_rate": 4.492460180126981e-05, "loss": 0.877, "step": 3315 }, { "epoch": 1.0800260247234874, "grad_norm": 1.4385253190994263, "learning_rate": 4.490900296605488e-05, "loss": 0.8266, "step": 3320 }, { "epoch": 1.0816525699414443, "grad_norm": 1.432668685913086, "learning_rate": 4.4893382913632265e-05, "loss": 0.8489, "step": 3325 }, { "epoch": 1.0832791151594015, "grad_norm": 1.6042089462280273, "learning_rate": 4.4877741660648386e-05, "loss": 0.8029, "step": 3330 }, { "epoch": 1.0849056603773586, "grad_norm": 1.888527750968933, "learning_rate": 4.486207922377228e-05, "loss": 0.8356, "step": 3335 }, { "epoch": 1.0865322055953155, "grad_norm": 1.3238813877105713, "learning_rate": 4.4846395619695556e-05, "loss": 0.8501, "step": 3340 }, { "epoch": 1.0881587508132726, "grad_norm": 1.2120682001113892, "learning_rate": 4.4830690865132364e-05, "loss": 0.8394, "step": 3345 }, { "epoch": 1.0897852960312298, "grad_norm": 1.4257880449295044, "learning_rate": 4.4814964976819406e-05, "loss": 0.8392, "step": 3350 }, { "epoch": 1.0914118412491867, "grad_norm": 1.3651307821273804, "learning_rate": 4.479921797151591e-05, "loss": 0.8333, "step": 3355 }, { "epoch": 1.0930383864671438, "grad_norm": 1.3549360036849976, "learning_rate": 4.47834498660036e-05, "loss": 0.8421, "step": 3360 }, { "epoch": 1.094664931685101, "grad_norm": 1.3159157037734985, "learning_rate": 4.4767660677086695e-05, "loss": 0.8652, "step": 3365 }, { "epoch": 1.0962914769030578, "grad_norm": 1.9989577531814575, "learning_rate": 4.4751850421591883e-05, "loss": 0.8075, "step": 3370 }, { "epoch": 1.097918022121015, "grad_norm": 1.2322934865951538, "learning_rate": 4.4736019116368285e-05, "loss": 0.8156, "step": 3375 }, { "epoch": 1.099544567338972, "grad_norm": 1.3388209342956543, "learning_rate": 4.472016677828749e-05, "loss": 0.8468, "step": 3380 }, { "epoch": 1.101171112556929, "grad_norm": 1.4790794849395752, "learning_rate": 4.470429342424346e-05, "loss": 0.8243, "step": 3385 }, { "epoch": 1.1027976577748861, "grad_norm": 1.611876368522644, "learning_rate": 4.4688399071152585e-05, "loss": 0.8547, "step": 3390 }, { "epoch": 1.1044242029928433, "grad_norm": 1.9611629247665405, "learning_rate": 4.467248373595362e-05, "loss": 0.8313, "step": 3395 }, { "epoch": 1.1060507482108002, "grad_norm": 1.3478145599365234, "learning_rate": 4.465654743560769e-05, "loss": 0.82, "step": 3400 }, { "epoch": 1.1076772934287573, "grad_norm": 1.2600871324539185, "learning_rate": 4.464059018709824e-05, "loss": 0.8135, "step": 3405 }, { "epoch": 1.1093038386467144, "grad_norm": 1.1783373355865479, "learning_rate": 4.462461200743108e-05, "loss": 0.8768, "step": 3410 }, { "epoch": 1.1109303838646714, "grad_norm": 1.286030888557434, "learning_rate": 4.460861291363429e-05, "loss": 0.8359, "step": 3415 }, { "epoch": 1.1125569290826285, "grad_norm": 1.572346568107605, "learning_rate": 4.459259292275825e-05, "loss": 0.8175, "step": 3420 }, { "epoch": 1.1141834743005856, "grad_norm": 1.7113646268844604, "learning_rate": 4.457655205187562e-05, "loss": 0.8611, "step": 3425 }, { "epoch": 1.1158100195185425, "grad_norm": 1.6623269319534302, "learning_rate": 4.4560490318081296e-05, "loss": 0.8456, "step": 3430 }, { "epoch": 1.1174365647364997, "grad_norm": 1.4825401306152344, "learning_rate": 4.454440773849242e-05, "loss": 0.8673, "step": 3435 }, { "epoch": 1.1190631099544568, "grad_norm": 1.1152056455612183, "learning_rate": 4.4528304330248344e-05, "loss": 0.8692, "step": 3440 }, { "epoch": 1.1206896551724137, "grad_norm": 1.2672181129455566, "learning_rate": 4.4512180110510616e-05, "loss": 0.8341, "step": 3445 }, { "epoch": 1.1223162003903708, "grad_norm": 1.7076444625854492, "learning_rate": 4.449603509646297e-05, "loss": 0.8471, "step": 3450 }, { "epoch": 1.123942745608328, "grad_norm": 1.6158792972564697, "learning_rate": 4.447986930531129e-05, "loss": 0.8468, "step": 3455 }, { "epoch": 1.1255692908262849, "grad_norm": 1.7821043729782104, "learning_rate": 4.446368275428361e-05, "loss": 0.856, "step": 3460 }, { "epoch": 1.127195836044242, "grad_norm": 1.264714002609253, "learning_rate": 4.444747546063009e-05, "loss": 0.818, "step": 3465 }, { "epoch": 1.1288223812621991, "grad_norm": 1.210666537284851, "learning_rate": 4.443124744162299e-05, "loss": 0.8637, "step": 3470 }, { "epoch": 1.130448926480156, "grad_norm": 1.4012978076934814, "learning_rate": 4.4414998714556664e-05, "loss": 0.8551, "step": 3475 }, { "epoch": 1.1320754716981132, "grad_norm": 1.4476510286331177, "learning_rate": 4.439872929674752e-05, "loss": 0.842, "step": 3480 }, { "epoch": 1.1337020169160703, "grad_norm": 1.296002984046936, "learning_rate": 4.438243920553403e-05, "loss": 0.8114, "step": 3485 }, { "epoch": 1.1353285621340272, "grad_norm": 1.5089248418807983, "learning_rate": 4.43661284582767e-05, "loss": 0.8109, "step": 3490 }, { "epoch": 1.1369551073519844, "grad_norm": 1.6034770011901855, "learning_rate": 4.4349797072358046e-05, "loss": 0.8756, "step": 3495 }, { "epoch": 1.1385816525699415, "grad_norm": 1.9600484371185303, "learning_rate": 4.4333445065182554e-05, "loss": 0.8303, "step": 3500 }, { "epoch": 1.1402081977878984, "grad_norm": 1.444976568222046, "learning_rate": 4.4317072454176735e-05, "loss": 0.8213, "step": 3505 }, { "epoch": 1.1418347430058555, "grad_norm": 1.5570260286331177, "learning_rate": 4.430067925678902e-05, "loss": 0.8284, "step": 3510 }, { "epoch": 1.1434612882238127, "grad_norm": 1.4301804304122925, "learning_rate": 4.4284265490489786e-05, "loss": 0.8244, "step": 3515 }, { "epoch": 1.1450878334417696, "grad_norm": 1.420100450515747, "learning_rate": 4.4267831172771345e-05, "loss": 0.8529, "step": 3520 }, { "epoch": 1.1467143786597267, "grad_norm": 1.7922027111053467, "learning_rate": 4.425137632114789e-05, "loss": 0.8326, "step": 3525 }, { "epoch": 1.1483409238776838, "grad_norm": 1.5114059448242188, "learning_rate": 4.423490095315551e-05, "loss": 0.8102, "step": 3530 }, { "epoch": 1.1499674690956407, "grad_norm": 1.2283594608306885, "learning_rate": 4.421840508635217e-05, "loss": 0.8577, "step": 3535 }, { "epoch": 1.1515940143135979, "grad_norm": 1.232806921005249, "learning_rate": 4.4201888738317646e-05, "loss": 0.8248, "step": 3540 }, { "epoch": 1.153220559531555, "grad_norm": 1.244806170463562, "learning_rate": 4.4185351926653575e-05, "loss": 0.8641, "step": 3545 }, { "epoch": 1.1548471047495121, "grad_norm": 1.365405797958374, "learning_rate": 4.416879466898339e-05, "loss": 0.8381, "step": 3550 }, { "epoch": 1.156473649967469, "grad_norm": 1.2330610752105713, "learning_rate": 4.4152216982952304e-05, "loss": 0.8146, "step": 3555 }, { "epoch": 1.1581001951854262, "grad_norm": 1.2920881509780884, "learning_rate": 4.413561888622732e-05, "loss": 0.8224, "step": 3560 }, { "epoch": 1.1597267404033833, "grad_norm": 1.205123782157898, "learning_rate": 4.411900039649718e-05, "loss": 0.8303, "step": 3565 }, { "epoch": 1.1613532856213402, "grad_norm": 1.283276915550232, "learning_rate": 4.4102361531472354e-05, "loss": 0.8507, "step": 3570 }, { "epoch": 1.1629798308392973, "grad_norm": 1.3507086038589478, "learning_rate": 4.4085702308885034e-05, "loss": 0.8421, "step": 3575 }, { "epoch": 1.1646063760572545, "grad_norm": 1.352668285369873, "learning_rate": 4.406902274648912e-05, "loss": 0.8164, "step": 3580 }, { "epoch": 1.1662329212752114, "grad_norm": 1.3182393312454224, "learning_rate": 4.4052322862060164e-05, "loss": 0.8509, "step": 3585 }, { "epoch": 1.1678594664931685, "grad_norm": 1.4305580854415894, "learning_rate": 4.403560267339539e-05, "loss": 0.8246, "step": 3590 }, { "epoch": 1.1694860117111257, "grad_norm": 1.5163909196853638, "learning_rate": 4.401886219831365e-05, "loss": 0.8093, "step": 3595 }, { "epoch": 1.1711125569290826, "grad_norm": 1.462122917175293, "learning_rate": 4.4002101454655444e-05, "loss": 0.8553, "step": 3600 }, { "epoch": 1.1727391021470397, "grad_norm": 1.171677589416504, "learning_rate": 4.398532046028282e-05, "loss": 0.8219, "step": 3605 }, { "epoch": 1.1743656473649968, "grad_norm": 1.333949089050293, "learning_rate": 4.3968519233079455e-05, "loss": 0.831, "step": 3610 }, { "epoch": 1.1759921925829537, "grad_norm": 1.4222731590270996, "learning_rate": 4.395169779095058e-05, "loss": 0.8608, "step": 3615 }, { "epoch": 1.1776187378009109, "grad_norm": 1.1908038854599, "learning_rate": 4.3934856151822936e-05, "loss": 0.8406, "step": 3620 }, { "epoch": 1.179245283018868, "grad_norm": 1.3163139820098877, "learning_rate": 4.391799433364484e-05, "loss": 0.8326, "step": 3625 }, { "epoch": 1.180871828236825, "grad_norm": 1.7144033908843994, "learning_rate": 4.390111235438607e-05, "loss": 0.8451, "step": 3630 }, { "epoch": 1.182498373454782, "grad_norm": 1.4411003589630127, "learning_rate": 4.38842102320379e-05, "loss": 0.8472, "step": 3635 }, { "epoch": 1.1841249186727392, "grad_norm": 1.4063175916671753, "learning_rate": 4.3867287984613104e-05, "loss": 0.8397, "step": 3640 }, { "epoch": 1.185751463890696, "grad_norm": 1.5677992105484009, "learning_rate": 4.385034563014586e-05, "loss": 0.8602, "step": 3645 }, { "epoch": 1.1873780091086532, "grad_norm": 1.3289976119995117, "learning_rate": 4.383338318669179e-05, "loss": 0.8245, "step": 3650 }, { "epoch": 1.1890045543266103, "grad_norm": 1.304602861404419, "learning_rate": 4.381640067232792e-05, "loss": 0.8366, "step": 3655 }, { "epoch": 1.1906310995445673, "grad_norm": 1.2882555723190308, "learning_rate": 4.379939810515268e-05, "loss": 0.8275, "step": 3660 }, { "epoch": 1.1922576447625244, "grad_norm": 1.4000959396362305, "learning_rate": 4.378237550328587e-05, "loss": 0.8867, "step": 3665 }, { "epoch": 1.1938841899804815, "grad_norm": 1.3018320798873901, "learning_rate": 4.376533288486863e-05, "loss": 0.8156, "step": 3670 }, { "epoch": 1.1955107351984384, "grad_norm": 1.4709359407424927, "learning_rate": 4.374827026806342e-05, "loss": 0.8513, "step": 3675 }, { "epoch": 1.1971372804163956, "grad_norm": 1.3351629972457886, "learning_rate": 4.373118767105404e-05, "loss": 0.8592, "step": 3680 }, { "epoch": 1.1987638256343527, "grad_norm": 1.5742251873016357, "learning_rate": 4.371408511204558e-05, "loss": 0.8281, "step": 3685 }, { "epoch": 1.2003903708523098, "grad_norm": 3.134833812713623, "learning_rate": 4.3696962609264376e-05, "loss": 0.8409, "step": 3690 }, { "epoch": 1.2020169160702667, "grad_norm": 1.4564152956008911, "learning_rate": 4.367982018095804e-05, "loss": 0.8282, "step": 3695 }, { "epoch": 1.2036434612882239, "grad_norm": 1.4403876066207886, "learning_rate": 4.366265784539543e-05, "loss": 0.8192, "step": 3700 }, { "epoch": 1.205270006506181, "grad_norm": 1.2065421342849731, "learning_rate": 4.364547562086659e-05, "loss": 0.838, "step": 3705 }, { "epoch": 1.206896551724138, "grad_norm": 1.2946791648864746, "learning_rate": 4.362827352568279e-05, "loss": 0.8424, "step": 3710 }, { "epoch": 1.208523096942095, "grad_norm": 1.4981353282928467, "learning_rate": 4.361105157817644e-05, "loss": 0.8334, "step": 3715 }, { "epoch": 1.2101496421600522, "grad_norm": 1.2502180337905884, "learning_rate": 4.3593809796701146e-05, "loss": 0.8064, "step": 3720 }, { "epoch": 1.211776187378009, "grad_norm": 1.3969752788543701, "learning_rate": 4.357654819963163e-05, "loss": 0.853, "step": 3725 }, { "epoch": 1.2134027325959662, "grad_norm": 1.3182035684585571, "learning_rate": 4.3559266805363726e-05, "loss": 0.8284, "step": 3730 }, { "epoch": 1.2150292778139233, "grad_norm": 1.3733329772949219, "learning_rate": 4.354196563231438e-05, "loss": 0.8411, "step": 3735 }, { "epoch": 1.2166558230318802, "grad_norm": 1.3710018396377563, "learning_rate": 4.3524644698921613e-05, "loss": 0.8413, "step": 3740 }, { "epoch": 1.2182823682498374, "grad_norm": 1.3340781927108765, "learning_rate": 4.3507304023644505e-05, "loss": 0.8259, "step": 3745 }, { "epoch": 1.2199089134677945, "grad_norm": 1.3577436208724976, "learning_rate": 4.3489943624963165e-05, "loss": 0.8301, "step": 3750 }, { "epoch": 1.2215354586857514, "grad_norm": 1.25057053565979, "learning_rate": 4.347256352137874e-05, "loss": 0.8414, "step": 3755 }, { "epoch": 1.2231620039037086, "grad_norm": 1.21907377243042, "learning_rate": 4.345516373141336e-05, "loss": 0.8443, "step": 3760 }, { "epoch": 1.2247885491216657, "grad_norm": 1.3550604581832886, "learning_rate": 4.3437744273610134e-05, "loss": 0.8414, "step": 3765 }, { "epoch": 1.2264150943396226, "grad_norm": 1.596409559249878, "learning_rate": 4.342030516653316e-05, "loss": 0.8242, "step": 3770 }, { "epoch": 1.2280416395575797, "grad_norm": 1.8163790702819824, "learning_rate": 4.340284642876744e-05, "loss": 0.8141, "step": 3775 }, { "epoch": 1.2296681847755369, "grad_norm": 1.5910507440567017, "learning_rate": 4.3385368078918906e-05, "loss": 0.8667, "step": 3780 }, { "epoch": 1.2312947299934938, "grad_norm": 1.4154040813446045, "learning_rate": 4.3367870135614406e-05, "loss": 0.8564, "step": 3785 }, { "epoch": 1.232921275211451, "grad_norm": 1.3193119764328003, "learning_rate": 4.3350352617501655e-05, "loss": 0.846, "step": 3790 }, { "epoch": 1.234547820429408, "grad_norm": 1.25566828250885, "learning_rate": 4.333281554324924e-05, "loss": 0.8672, "step": 3795 }, { "epoch": 1.236174365647365, "grad_norm": 1.360984206199646, "learning_rate": 4.3315258931546575e-05, "loss": 0.8605, "step": 3800 }, { "epoch": 1.237800910865322, "grad_norm": 1.1945350170135498, "learning_rate": 4.329768280110391e-05, "loss": 0.8331, "step": 3805 }, { "epoch": 1.2394274560832792, "grad_norm": 1.581734538078308, "learning_rate": 4.328008717065228e-05, "loss": 0.8156, "step": 3810 }, { "epoch": 1.241054001301236, "grad_norm": 1.2851417064666748, "learning_rate": 4.3262472058943516e-05, "loss": 0.8327, "step": 3815 }, { "epoch": 1.2426805465191932, "grad_norm": 1.3768497705459595, "learning_rate": 4.32448374847502e-05, "loss": 0.8525, "step": 3820 }, { "epoch": 1.2443070917371504, "grad_norm": 1.3952062129974365, "learning_rate": 4.3227183466865674e-05, "loss": 0.8556, "step": 3825 }, { "epoch": 1.2459336369551073, "grad_norm": 1.4856510162353516, "learning_rate": 4.320951002410396e-05, "loss": 0.8322, "step": 3830 }, { "epoch": 1.2475601821730644, "grad_norm": 1.4851360321044922, "learning_rate": 4.319181717529984e-05, "loss": 0.802, "step": 3835 }, { "epoch": 1.2491867273910215, "grad_norm": 1.5054421424865723, "learning_rate": 4.3174104939308725e-05, "loss": 0.7976, "step": 3840 }, { "epoch": 1.2508132726089785, "grad_norm": 1.2087050676345825, "learning_rate": 4.315637333500672e-05, "loss": 0.8506, "step": 3845 }, { "epoch": 1.2524398178269356, "grad_norm": 1.23397696018219, "learning_rate": 4.313862238129055e-05, "loss": 0.8424, "step": 3850 }, { "epoch": 1.2540663630448927, "grad_norm": 1.4934568405151367, "learning_rate": 4.312085209707756e-05, "loss": 0.8617, "step": 3855 }, { "epoch": 1.2556929082628496, "grad_norm": 1.785396933555603, "learning_rate": 4.310306250130573e-05, "loss": 0.8516, "step": 3860 }, { "epoch": 1.2573194534808068, "grad_norm": 1.5227130651474, "learning_rate": 4.308525361293358e-05, "loss": 0.8366, "step": 3865 }, { "epoch": 1.258945998698764, "grad_norm": 1.609745740890503, "learning_rate": 4.306742545094021e-05, "loss": 0.8652, "step": 3870 }, { "epoch": 1.2605725439167208, "grad_norm": 1.6145974397659302, "learning_rate": 4.3049578034325267e-05, "loss": 0.8352, "step": 3875 }, { "epoch": 1.262199089134678, "grad_norm": 1.452619194984436, "learning_rate": 4.303171138210889e-05, "loss": 0.8591, "step": 3880 }, { "epoch": 1.263825634352635, "grad_norm": 1.5672597885131836, "learning_rate": 4.3013825513331734e-05, "loss": 0.8194, "step": 3885 }, { "epoch": 1.265452179570592, "grad_norm": 1.3183356523513794, "learning_rate": 4.299592044705495e-05, "loss": 0.86, "step": 3890 }, { "epoch": 1.267078724788549, "grad_norm": 1.2463092803955078, "learning_rate": 4.297799620236012e-05, "loss": 0.8454, "step": 3895 }, { "epoch": 1.2687052700065062, "grad_norm": 1.545088291168213, "learning_rate": 4.296005279834928e-05, "loss": 0.8409, "step": 3900 }, { "epoch": 1.2703318152244631, "grad_norm": 1.7090797424316406, "learning_rate": 4.294209025414488e-05, "loss": 0.8669, "step": 3905 }, { "epoch": 1.2719583604424203, "grad_norm": 1.3837332725524902, "learning_rate": 4.292410858888977e-05, "loss": 0.8373, "step": 3910 }, { "epoch": 1.2735849056603774, "grad_norm": 1.3135814666748047, "learning_rate": 4.2906107821747176e-05, "loss": 0.8253, "step": 3915 }, { "epoch": 1.2752114508783343, "grad_norm": 1.7542099952697754, "learning_rate": 4.288808797190068e-05, "loss": 0.8576, "step": 3920 }, { "epoch": 1.2768379960962914, "grad_norm": 1.6044366359710693, "learning_rate": 4.2870049058554204e-05, "loss": 0.8531, "step": 3925 }, { "epoch": 1.2784645413142486, "grad_norm": 1.5496442317962646, "learning_rate": 4.2851991100931976e-05, "loss": 0.8392, "step": 3930 }, { "epoch": 1.2800910865322055, "grad_norm": 1.1760096549987793, "learning_rate": 4.283391411827854e-05, "loss": 0.8284, "step": 3935 }, { "epoch": 1.2817176317501626, "grad_norm": 1.7636168003082275, "learning_rate": 4.2815818129858696e-05, "loss": 0.8476, "step": 3940 }, { "epoch": 1.2833441769681198, "grad_norm": 1.467448353767395, "learning_rate": 4.27977031549575e-05, "loss": 0.8397, "step": 3945 }, { "epoch": 1.2849707221860767, "grad_norm": 1.397292137145996, "learning_rate": 4.2779569212880265e-05, "loss": 0.8278, "step": 3950 }, { "epoch": 1.2865972674040338, "grad_norm": 1.4854656457901, "learning_rate": 4.2761416322952484e-05, "loss": 0.8056, "step": 3955 }, { "epoch": 1.288223812621991, "grad_norm": 1.2849969863891602, "learning_rate": 4.274324450451986e-05, "loss": 0.8191, "step": 3960 }, { "epoch": 1.2898503578399478, "grad_norm": 1.3098669052124023, "learning_rate": 4.2725053776948276e-05, "loss": 0.8418, "step": 3965 }, { "epoch": 1.291476903057905, "grad_norm": 1.2671085596084595, "learning_rate": 4.270684415962375e-05, "loss": 0.8589, "step": 3970 }, { "epoch": 1.293103448275862, "grad_norm": 1.3790541887283325, "learning_rate": 4.2688615671952434e-05, "loss": 0.8109, "step": 3975 }, { "epoch": 1.294729993493819, "grad_norm": 1.7077306509017944, "learning_rate": 4.2670368333360614e-05, "loss": 0.8487, "step": 3980 }, { "epoch": 1.2963565387117761, "grad_norm": 1.24965500831604, "learning_rate": 4.2652102163294633e-05, "loss": 0.8361, "step": 3985 }, { "epoch": 1.2979830839297333, "grad_norm": 1.0798790454864502, "learning_rate": 4.2633817181220915e-05, "loss": 0.8193, "step": 3990 }, { "epoch": 1.2996096291476902, "grad_norm": 1.306200623512268, "learning_rate": 4.261551340662594e-05, "loss": 0.863, "step": 3995 }, { "epoch": 1.3012361743656473, "grad_norm": 1.2955039739608765, "learning_rate": 4.25971908590162e-05, "loss": 0.8507, "step": 4000 }, { "epoch": 1.3028627195836044, "grad_norm": 1.3031805753707886, "learning_rate": 4.2578849557918214e-05, "loss": 0.8305, "step": 4005 }, { "epoch": 1.3044892648015614, "grad_norm": 1.2753714323043823, "learning_rate": 4.2560489522878477e-05, "loss": 0.816, "step": 4010 }, { "epoch": 1.3061158100195185, "grad_norm": 1.4747792482376099, "learning_rate": 4.254211077346343e-05, "loss": 0.8263, "step": 4015 }, { "epoch": 1.3077423552374756, "grad_norm": 1.511306881904602, "learning_rate": 4.2523713329259484e-05, "loss": 0.8426, "step": 4020 }, { "epoch": 1.3093689004554325, "grad_norm": 1.3999812602996826, "learning_rate": 4.250529720987297e-05, "loss": 0.8266, "step": 4025 }, { "epoch": 1.3109954456733897, "grad_norm": 1.2741875648498535, "learning_rate": 4.248686243493012e-05, "loss": 0.8154, "step": 4030 }, { "epoch": 1.3126219908913468, "grad_norm": 1.491328239440918, "learning_rate": 4.2468409024077026e-05, "loss": 0.824, "step": 4035 }, { "epoch": 1.3142485361093037, "grad_norm": 1.2104856967926025, "learning_rate": 4.244993699697968e-05, "loss": 0.8123, "step": 4040 }, { "epoch": 1.3158750813272608, "grad_norm": 1.2370222806930542, "learning_rate": 4.243144637332387e-05, "loss": 0.833, "step": 4045 }, { "epoch": 1.317501626545218, "grad_norm": 1.3075882196426392, "learning_rate": 4.241293717281522e-05, "loss": 0.8418, "step": 4050 }, { "epoch": 1.319128171763175, "grad_norm": 1.3516918420791626, "learning_rate": 4.239440941517919e-05, "loss": 0.8196, "step": 4055 }, { "epoch": 1.320754716981132, "grad_norm": 1.6177656650543213, "learning_rate": 4.2375863120160955e-05, "loss": 0.8561, "step": 4060 }, { "epoch": 1.3223812621990891, "grad_norm": 1.4435988664627075, "learning_rate": 4.2357298307525465e-05, "loss": 0.8467, "step": 4065 }, { "epoch": 1.3240078074170463, "grad_norm": 1.2693395614624023, "learning_rate": 4.233871499705743e-05, "loss": 0.8264, "step": 4070 }, { "epoch": 1.3256343526350032, "grad_norm": 1.22328519821167, "learning_rate": 4.2320113208561254e-05, "loss": 0.8373, "step": 4075 }, { "epoch": 1.3272608978529603, "grad_norm": 1.560599684715271, "learning_rate": 4.230149296186102e-05, "loss": 0.8208, "step": 4080 }, { "epoch": 1.3288874430709174, "grad_norm": 1.2332558631896973, "learning_rate": 4.228285427680052e-05, "loss": 0.8438, "step": 4085 }, { "epoch": 1.3305139882888743, "grad_norm": 1.301831841468811, "learning_rate": 4.226419717324315e-05, "loss": 0.8099, "step": 4090 }, { "epoch": 1.3321405335068315, "grad_norm": 1.3738305568695068, "learning_rate": 4.2245521671071954e-05, "loss": 0.8358, "step": 4095 }, { "epoch": 1.3337670787247886, "grad_norm": 1.287594199180603, "learning_rate": 4.2226827790189604e-05, "loss": 0.858, "step": 4100 }, { "epoch": 1.3353936239427457, "grad_norm": 1.2868845462799072, "learning_rate": 4.220811555051834e-05, "loss": 0.8231, "step": 4105 }, { "epoch": 1.3370201691607027, "grad_norm": 1.3951457738876343, "learning_rate": 4.2189384971999956e-05, "loss": 0.8375, "step": 4110 }, { "epoch": 1.3386467143786598, "grad_norm": 1.1373964548110962, "learning_rate": 4.217063607459581e-05, "loss": 0.813, "step": 4115 }, { "epoch": 1.340273259596617, "grad_norm": 1.5586174726486206, "learning_rate": 4.2151868878286774e-05, "loss": 0.8818, "step": 4120 }, { "epoch": 1.3418998048145738, "grad_norm": 1.2126144170761108, "learning_rate": 4.2133083403073217e-05, "loss": 0.823, "step": 4125 }, { "epoch": 1.343526350032531, "grad_norm": 1.4792400598526, "learning_rate": 4.2114279668975e-05, "loss": 0.8427, "step": 4130 }, { "epoch": 1.345152895250488, "grad_norm": 1.5245232582092285, "learning_rate": 4.209545769603143e-05, "loss": 0.8448, "step": 4135 }, { "epoch": 1.346779440468445, "grad_norm": 1.541342854499817, "learning_rate": 4.2076617504301254e-05, "loss": 0.8457, "step": 4140 }, { "epoch": 1.3484059856864021, "grad_norm": 1.507347822189331, "learning_rate": 4.2057759113862645e-05, "loss": 0.8319, "step": 4145 }, { "epoch": 1.3500325309043593, "grad_norm": 1.3123031854629517, "learning_rate": 4.2038882544813156e-05, "loss": 0.8426, "step": 4150 }, { "epoch": 1.3516590761223162, "grad_norm": 1.232587456703186, "learning_rate": 4.2019987817269726e-05, "loss": 0.8275, "step": 4155 }, { "epoch": 1.3532856213402733, "grad_norm": 1.5750877857208252, "learning_rate": 4.2001074951368645e-05, "loss": 0.8375, "step": 4160 }, { "epoch": 1.3549121665582304, "grad_norm": 1.3290623426437378, "learning_rate": 4.198214396726552e-05, "loss": 0.8488, "step": 4165 }, { "epoch": 1.3565387117761873, "grad_norm": 1.305728554725647, "learning_rate": 4.196319488513527e-05, "loss": 0.8538, "step": 4170 }, { "epoch": 1.3581652569941445, "grad_norm": 1.481744408607483, "learning_rate": 4.1944227725172124e-05, "loss": 0.846, "step": 4175 }, { "epoch": 1.3597918022121016, "grad_norm": 1.418739676475525, "learning_rate": 4.192524250758953e-05, "loss": 0.8305, "step": 4180 }, { "epoch": 1.3614183474300585, "grad_norm": 1.3980685472488403, "learning_rate": 4.190623925262025e-05, "loss": 0.8249, "step": 4185 }, { "epoch": 1.3630448926480156, "grad_norm": 1.3720591068267822, "learning_rate": 4.188721798051619e-05, "loss": 0.8291, "step": 4190 }, { "epoch": 1.3646714378659728, "grad_norm": 1.6241954565048218, "learning_rate": 4.186817871154851e-05, "loss": 0.8498, "step": 4195 }, { "epoch": 1.3662979830839297, "grad_norm": 1.3464034795761108, "learning_rate": 4.184912146600754e-05, "loss": 0.8359, "step": 4200 }, { "epoch": 1.3679245283018868, "grad_norm": 1.3985097408294678, "learning_rate": 4.1830046264202746e-05, "loss": 0.8453, "step": 4205 }, { "epoch": 1.369551073519844, "grad_norm": 1.3351011276245117, "learning_rate": 4.181095312646277e-05, "loss": 0.8504, "step": 4210 }, { "epoch": 1.3711776187378009, "grad_norm": 1.2163972854614258, "learning_rate": 4.179184207313532e-05, "loss": 0.8333, "step": 4215 }, { "epoch": 1.372804163955758, "grad_norm": 1.3725260496139526, "learning_rate": 4.177271312458724e-05, "loss": 0.8135, "step": 4220 }, { "epoch": 1.3744307091737151, "grad_norm": 1.365580677986145, "learning_rate": 4.1753566301204414e-05, "loss": 0.795, "step": 4225 }, { "epoch": 1.376057254391672, "grad_norm": 1.175580620765686, "learning_rate": 4.173440162339179e-05, "loss": 0.8431, "step": 4230 }, { "epoch": 1.3776837996096292, "grad_norm": 1.8168003559112549, "learning_rate": 4.1715219111573343e-05, "loss": 0.8279, "step": 4235 }, { "epoch": 1.3793103448275863, "grad_norm": 1.7160747051239014, "learning_rate": 4.169601878619206e-05, "loss": 0.8292, "step": 4240 }, { "epoch": 1.3809368900455432, "grad_norm": 1.8717422485351562, "learning_rate": 4.167680066770989e-05, "loss": 0.8277, "step": 4245 }, { "epoch": 1.3825634352635003, "grad_norm": 1.320741891860962, "learning_rate": 4.165756477660777e-05, "loss": 0.8419, "step": 4250 }, { "epoch": 1.3841899804814575, "grad_norm": 1.2906664609909058, "learning_rate": 4.1638311133385566e-05, "loss": 0.8554, "step": 4255 }, { "epoch": 1.3858165256994144, "grad_norm": 1.23048734664917, "learning_rate": 4.161903975856205e-05, "loss": 0.8213, "step": 4260 }, { "epoch": 1.3874430709173715, "grad_norm": 1.2474876642227173, "learning_rate": 4.1599750672674926e-05, "loss": 0.8223, "step": 4265 }, { "epoch": 1.3890696161353286, "grad_norm": 1.122220516204834, "learning_rate": 4.158044389628073e-05, "loss": 0.8307, "step": 4270 }, { "epoch": 1.3906961613532856, "grad_norm": 1.42267644405365, "learning_rate": 4.1561119449954875e-05, "loss": 0.8614, "step": 4275 }, { "epoch": 1.3923227065712427, "grad_norm": 1.469414472579956, "learning_rate": 4.154177735429161e-05, "loss": 0.8456, "step": 4280 }, { "epoch": 1.3939492517891998, "grad_norm": 1.480615258216858, "learning_rate": 4.152241762990397e-05, "loss": 0.8431, "step": 4285 }, { "epoch": 1.3955757970071567, "grad_norm": 1.188193678855896, "learning_rate": 4.150304029742381e-05, "loss": 0.8272, "step": 4290 }, { "epoch": 1.3972023422251139, "grad_norm": 1.2787021398544312, "learning_rate": 4.148364537750172e-05, "loss": 0.8252, "step": 4295 }, { "epoch": 1.398828887443071, "grad_norm": 1.683848261833191, "learning_rate": 4.146423289080705e-05, "loss": 0.8548, "step": 4300 }, { "epoch": 1.400455432661028, "grad_norm": 1.2856671810150146, "learning_rate": 4.144480285802787e-05, "loss": 0.837, "step": 4305 }, { "epoch": 1.402081977878985, "grad_norm": 1.0996733903884888, "learning_rate": 4.1425355299870915e-05, "loss": 0.8322, "step": 4310 }, { "epoch": 1.4037085230969422, "grad_norm": 1.442826509475708, "learning_rate": 4.140589023706166e-05, "loss": 0.8422, "step": 4315 }, { "epoch": 1.405335068314899, "grad_norm": 1.440993070602417, "learning_rate": 4.138640769034419e-05, "loss": 0.8305, "step": 4320 }, { "epoch": 1.4069616135328562, "grad_norm": 1.373291254043579, "learning_rate": 4.1366907680481236e-05, "loss": 0.8486, "step": 4325 }, { "epoch": 1.4085881587508133, "grad_norm": 1.3873260021209717, "learning_rate": 4.1347390228254145e-05, "loss": 0.842, "step": 4330 }, { "epoch": 1.4102147039687702, "grad_norm": 1.4314653873443604, "learning_rate": 4.132785535446283e-05, "loss": 0.8271, "step": 4335 }, { "epoch": 1.4118412491867274, "grad_norm": 1.5679014921188354, "learning_rate": 4.130830307992579e-05, "loss": 0.8708, "step": 4340 }, { "epoch": 1.4134677944046845, "grad_norm": 1.4917312860488892, "learning_rate": 4.128873342548007e-05, "loss": 0.8243, "step": 4345 }, { "epoch": 1.4150943396226414, "grad_norm": 1.3635531663894653, "learning_rate": 4.126914641198123e-05, "loss": 0.8086, "step": 4350 }, { "epoch": 1.4167208848405985, "grad_norm": 1.4005459547042847, "learning_rate": 4.1249542060303325e-05, "loss": 0.8659, "step": 4355 }, { "epoch": 1.4183474300585557, "grad_norm": 1.474947452545166, "learning_rate": 4.1229920391338896e-05, "loss": 0.8312, "step": 4360 }, { "epoch": 1.4199739752765126, "grad_norm": 1.6620033979415894, "learning_rate": 4.121028142599894e-05, "loss": 0.8135, "step": 4365 }, { "epoch": 1.4216005204944697, "grad_norm": 1.54270339012146, "learning_rate": 4.119062518521288e-05, "loss": 0.8418, "step": 4370 }, { "epoch": 1.4232270657124269, "grad_norm": 1.403079867362976, "learning_rate": 4.117095168992855e-05, "loss": 0.8137, "step": 4375 }, { "epoch": 1.4248536109303838, "grad_norm": 1.7641295194625854, "learning_rate": 4.115126096111218e-05, "loss": 0.8301, "step": 4380 }, { "epoch": 1.426480156148341, "grad_norm": 1.3226902484893799, "learning_rate": 4.113155301974836e-05, "loss": 0.8438, "step": 4385 }, { "epoch": 1.428106701366298, "grad_norm": 1.511962652206421, "learning_rate": 4.1111827886840015e-05, "loss": 0.8363, "step": 4390 }, { "epoch": 1.429733246584255, "grad_norm": 1.3033729791641235, "learning_rate": 4.1092085583408415e-05, "loss": 0.819, "step": 4395 }, { "epoch": 1.431359791802212, "grad_norm": 1.3664463758468628, "learning_rate": 4.1072326130493104e-05, "loss": 0.8678, "step": 4400 }, { "epoch": 1.4329863370201692, "grad_norm": 1.1375826597213745, "learning_rate": 4.105254954915192e-05, "loss": 0.8187, "step": 4405 }, { "epoch": 1.434612882238126, "grad_norm": 1.6312228441238403, "learning_rate": 4.103275586046095e-05, "loss": 0.8563, "step": 4410 }, { "epoch": 1.4362394274560832, "grad_norm": 1.4228655099868774, "learning_rate": 4.10129450855145e-05, "loss": 0.8562, "step": 4415 }, { "epoch": 1.4378659726740404, "grad_norm": 1.4475655555725098, "learning_rate": 4.099311724542509e-05, "loss": 0.8341, "step": 4420 }, { "epoch": 1.4394925178919973, "grad_norm": 1.57426118850708, "learning_rate": 4.097327236132346e-05, "loss": 0.8437, "step": 4425 }, { "epoch": 1.4411190631099544, "grad_norm": 1.3540325164794922, "learning_rate": 4.0953410454358455e-05, "loss": 0.8148, "step": 4430 }, { "epoch": 1.4427456083279115, "grad_norm": 1.5952328443527222, "learning_rate": 4.09335315456971e-05, "loss": 0.8095, "step": 4435 }, { "epoch": 1.4443721535458685, "grad_norm": 1.453477144241333, "learning_rate": 4.091363565652455e-05, "loss": 0.8476, "step": 4440 }, { "epoch": 1.4459986987638256, "grad_norm": 1.2013658285140991, "learning_rate": 4.089372280804401e-05, "loss": 0.8346, "step": 4445 }, { "epoch": 1.4476252439817827, "grad_norm": 1.597764492034912, "learning_rate": 4.08737930214768e-05, "loss": 0.823, "step": 4450 }, { "epoch": 1.4492517891997396, "grad_norm": 1.2798513174057007, "learning_rate": 4.0853846318062285e-05, "loss": 0.8302, "step": 4455 }, { "epoch": 1.4508783344176968, "grad_norm": 1.343942403793335, "learning_rate": 4.083388271905783e-05, "loss": 0.7966, "step": 4460 }, { "epoch": 1.452504879635654, "grad_norm": 1.3336753845214844, "learning_rate": 4.0813902245738845e-05, "loss": 0.8371, "step": 4465 }, { "epoch": 1.4541314248536108, "grad_norm": 1.4769428968429565, "learning_rate": 4.0793904919398685e-05, "loss": 0.8164, "step": 4470 }, { "epoch": 1.455757970071568, "grad_norm": 1.4396363496780396, "learning_rate": 4.077389076134871e-05, "loss": 0.8093, "step": 4475 }, { "epoch": 1.457384515289525, "grad_norm": 1.3327172994613647, "learning_rate": 4.075385979291817e-05, "loss": 0.8464, "step": 4480 }, { "epoch": 1.459011060507482, "grad_norm": 1.42361581325531, "learning_rate": 4.0733812035454264e-05, "loss": 0.8227, "step": 4485 }, { "epoch": 1.460637605725439, "grad_norm": 1.6889543533325195, "learning_rate": 4.071374751032206e-05, "loss": 0.8036, "step": 4490 }, { "epoch": 1.4622641509433962, "grad_norm": 1.4943583011627197, "learning_rate": 4.0693666238904525e-05, "loss": 0.8635, "step": 4495 }, { "epoch": 1.4638906961613534, "grad_norm": 1.2892553806304932, "learning_rate": 4.067356824260244e-05, "loss": 0.8995, "step": 4500 }, { "epoch": 1.4655172413793103, "grad_norm": 1.1585367918014526, "learning_rate": 4.065345354283444e-05, "loss": 0.8217, "step": 4505 }, { "epoch": 1.4671437865972674, "grad_norm": 1.838330626487732, "learning_rate": 4.0633322161036924e-05, "loss": 0.8417, "step": 4510 }, { "epoch": 1.4687703318152245, "grad_norm": 1.6699178218841553, "learning_rate": 4.061317411866411e-05, "loss": 0.8639, "step": 4515 }, { "epoch": 1.4703968770331814, "grad_norm": 1.583653450012207, "learning_rate": 4.059300943718794e-05, "loss": 0.8534, "step": 4520 }, { "epoch": 1.4720234222511386, "grad_norm": 1.8750361204147339, "learning_rate": 4.0572828138098106e-05, "loss": 0.8142, "step": 4525 }, { "epoch": 1.4736499674690957, "grad_norm": 1.3993104696273804, "learning_rate": 4.055263024290201e-05, "loss": 0.7999, "step": 4530 }, { "epoch": 1.4752765126870526, "grad_norm": 1.4795867204666138, "learning_rate": 4.053241577312472e-05, "loss": 0.8381, "step": 4535 }, { "epoch": 1.4769030579050098, "grad_norm": 1.5453708171844482, "learning_rate": 4.051218475030899e-05, "loss": 0.8532, "step": 4540 }, { "epoch": 1.4785296031229669, "grad_norm": 1.2877352237701416, "learning_rate": 4.0491937196015214e-05, "loss": 0.8439, "step": 4545 }, { "epoch": 1.480156148340924, "grad_norm": 1.3333778381347656, "learning_rate": 4.0471673131821386e-05, "loss": 0.8575, "step": 4550 }, { "epoch": 1.481782693558881, "grad_norm": 1.1907378435134888, "learning_rate": 4.045139257932311e-05, "loss": 0.8341, "step": 4555 }, { "epoch": 1.483409238776838, "grad_norm": 1.38154137134552, "learning_rate": 4.043109556013356e-05, "loss": 0.8292, "step": 4560 }, { "epoch": 1.4850357839947952, "grad_norm": 1.0535759925842285, "learning_rate": 4.041078209588346e-05, "loss": 0.8321, "step": 4565 }, { "epoch": 1.486662329212752, "grad_norm": 1.3018041849136353, "learning_rate": 4.039045220822104e-05, "loss": 0.8524, "step": 4570 }, { "epoch": 1.4882888744307092, "grad_norm": 1.3513591289520264, "learning_rate": 4.037010591881206e-05, "loss": 0.8601, "step": 4575 }, { "epoch": 1.4899154196486664, "grad_norm": 1.492363452911377, "learning_rate": 4.0349743249339756e-05, "loss": 0.8493, "step": 4580 }, { "epoch": 1.4915419648666233, "grad_norm": 1.2432150840759277, "learning_rate": 4.0329364221504796e-05, "loss": 0.833, "step": 4585 }, { "epoch": 1.4931685100845804, "grad_norm": 1.3918063640594482, "learning_rate": 4.0308968857025296e-05, "loss": 0.8231, "step": 4590 }, { "epoch": 1.4947950553025375, "grad_norm": 1.4169198274612427, "learning_rate": 4.02885571776368e-05, "loss": 0.8075, "step": 4595 }, { "epoch": 1.4964216005204944, "grad_norm": 1.5862514972686768, "learning_rate": 4.026812920509221e-05, "loss": 0.8306, "step": 4600 }, { "epoch": 1.4980481457384516, "grad_norm": 1.168561577796936, "learning_rate": 4.0247684961161815e-05, "loss": 0.8166, "step": 4605 }, { "epoch": 1.4996746909564087, "grad_norm": 1.3219666481018066, "learning_rate": 4.022722446763322e-05, "loss": 0.824, "step": 4610 }, { "epoch": 1.5013012361743656, "grad_norm": 1.3574262857437134, "learning_rate": 4.0206747746311376e-05, "loss": 0.8256, "step": 4615 }, { "epoch": 1.5029277813923227, "grad_norm": 1.6269795894622803, "learning_rate": 4.0186254819018504e-05, "loss": 0.826, "step": 4620 }, { "epoch": 1.5045543266102799, "grad_norm": 1.2621785402297974, "learning_rate": 4.01657457075941e-05, "loss": 0.8803, "step": 4625 }, { "epoch": 1.5061808718282368, "grad_norm": 1.5004969835281372, "learning_rate": 4.014522043389493e-05, "loss": 0.849, "step": 4630 }, { "epoch": 1.507807417046194, "grad_norm": 1.3491284847259521, "learning_rate": 4.012467901979496e-05, "loss": 0.8125, "step": 4635 }, { "epoch": 1.509433962264151, "grad_norm": 1.6598507165908813, "learning_rate": 4.010412148718535e-05, "loss": 0.8602, "step": 4640 }, { "epoch": 1.511060507482108, "grad_norm": 1.3131393194198608, "learning_rate": 4.008354785797447e-05, "loss": 0.7952, "step": 4645 }, { "epoch": 1.512687052700065, "grad_norm": 1.3953964710235596, "learning_rate": 4.006295815408781e-05, "loss": 0.8184, "step": 4650 }, { "epoch": 1.5143135979180222, "grad_norm": 1.2592307329177856, "learning_rate": 4.004235239746803e-05, "loss": 0.8359, "step": 4655 }, { "epoch": 1.5159401431359791, "grad_norm": 1.329085350036621, "learning_rate": 4.0021730610074856e-05, "loss": 0.8396, "step": 4660 }, { "epoch": 1.5175666883539363, "grad_norm": 1.179267168045044, "learning_rate": 4.0001092813885116e-05, "loss": 0.841, "step": 4665 }, { "epoch": 1.5191932335718934, "grad_norm": 1.4990901947021484, "learning_rate": 3.99804390308927e-05, "loss": 0.8006, "step": 4670 }, { "epoch": 1.5208197787898503, "grad_norm": 1.373935341835022, "learning_rate": 3.995976928310855e-05, "loss": 0.8255, "step": 4675 }, { "epoch": 1.5224463240078074, "grad_norm": 1.231176733970642, "learning_rate": 3.9939083592560586e-05, "loss": 0.8338, "step": 4680 }, { "epoch": 1.5240728692257646, "grad_norm": 1.495413899421692, "learning_rate": 3.991838198129376e-05, "loss": 0.8551, "step": 4685 }, { "epoch": 1.5256994144437215, "grad_norm": 1.2172863483428955, "learning_rate": 3.989766447136995e-05, "loss": 0.8559, "step": 4690 }, { "epoch": 1.5273259596616786, "grad_norm": 1.4367778301239014, "learning_rate": 3.987693108486801e-05, "loss": 0.8168, "step": 4695 }, { "epoch": 1.5289525048796357, "grad_norm": 1.2298533916473389, "learning_rate": 3.9856181843883687e-05, "loss": 0.8144, "step": 4700 }, { "epoch": 1.5305790500975927, "grad_norm": 1.074080467224121, "learning_rate": 3.9835416770529655e-05, "loss": 0.8193, "step": 4705 }, { "epoch": 1.5322055953155498, "grad_norm": 1.3786952495574951, "learning_rate": 3.981463588693543e-05, "loss": 0.8501, "step": 4710 }, { "epoch": 1.533832140533507, "grad_norm": 1.4469772577285767, "learning_rate": 3.9793839215247394e-05, "loss": 0.8235, "step": 4715 }, { "epoch": 1.5354586857514638, "grad_norm": 1.1742690801620483, "learning_rate": 3.977302677762876e-05, "loss": 0.8097, "step": 4720 }, { "epoch": 1.537085230969421, "grad_norm": 1.3256275653839111, "learning_rate": 3.975219859625953e-05, "loss": 0.8283, "step": 4725 }, { "epoch": 1.538711776187378, "grad_norm": 1.3272302150726318, "learning_rate": 3.973135469333647e-05, "loss": 0.8486, "step": 4730 }, { "epoch": 1.540338321405335, "grad_norm": 1.0883424282073975, "learning_rate": 3.971049509107315e-05, "loss": 0.8468, "step": 4735 }, { "epoch": 1.5419648666232921, "grad_norm": 1.385239839553833, "learning_rate": 3.968961981169983e-05, "loss": 0.8537, "step": 4740 }, { "epoch": 1.5435914118412493, "grad_norm": 1.4347864389419556, "learning_rate": 3.9668728877463465e-05, "loss": 0.8353, "step": 4745 }, { "epoch": 1.5452179570592062, "grad_norm": 1.398784875869751, "learning_rate": 3.9647822310627755e-05, "loss": 0.8364, "step": 4750 }, { "epoch": 1.5468445022771633, "grad_norm": 1.2484323978424072, "learning_rate": 3.962690013347299e-05, "loss": 0.8463, "step": 4755 }, { "epoch": 1.5484710474951204, "grad_norm": 1.4271115064620972, "learning_rate": 3.9605962368296135e-05, "loss": 0.8263, "step": 4760 }, { "epoch": 1.5500975927130773, "grad_norm": 1.662020206451416, "learning_rate": 3.958500903741077e-05, "loss": 0.8248, "step": 4765 }, { "epoch": 1.5517241379310345, "grad_norm": 1.8004493713378906, "learning_rate": 3.956404016314703e-05, "loss": 0.8058, "step": 4770 }, { "epoch": 1.5533506831489916, "grad_norm": 1.2193635702133179, "learning_rate": 3.954305576785166e-05, "loss": 0.8422, "step": 4775 }, { "epoch": 1.5549772283669485, "grad_norm": 1.7280292510986328, "learning_rate": 3.9522055873887906e-05, "loss": 0.836, "step": 4780 }, { "epoch": 1.5566037735849056, "grad_norm": 1.300466775894165, "learning_rate": 3.950104050363557e-05, "loss": 0.8202, "step": 4785 }, { "epoch": 1.5582303188028628, "grad_norm": 1.218762755393982, "learning_rate": 3.94800096794909e-05, "loss": 0.8471, "step": 4790 }, { "epoch": 1.5598568640208197, "grad_norm": 1.5563271045684814, "learning_rate": 3.945896342386666e-05, "loss": 0.8435, "step": 4795 }, { "epoch": 1.5614834092387768, "grad_norm": 1.4026027917861938, "learning_rate": 3.943790175919201e-05, "loss": 0.8382, "step": 4800 }, { "epoch": 1.563109954456734, "grad_norm": 1.5058375597000122, "learning_rate": 3.9416824707912594e-05, "loss": 0.8146, "step": 4805 }, { "epoch": 1.5647364996746909, "grad_norm": 1.2811481952667236, "learning_rate": 3.9395732292490404e-05, "loss": 0.8297, "step": 4810 }, { "epoch": 1.566363044892648, "grad_norm": 1.1377458572387695, "learning_rate": 3.937462453540381e-05, "loss": 0.8418, "step": 4815 }, { "epoch": 1.5679895901106051, "grad_norm": 1.2678501605987549, "learning_rate": 3.935350145914757e-05, "loss": 0.8288, "step": 4820 }, { "epoch": 1.569616135328562, "grad_norm": 1.4019118547439575, "learning_rate": 3.933236308623271e-05, "loss": 0.8147, "step": 4825 }, { "epoch": 1.5712426805465192, "grad_norm": 1.3262853622436523, "learning_rate": 3.931120943918661e-05, "loss": 0.8506, "step": 4830 }, { "epoch": 1.5728692257644763, "grad_norm": 1.2418149709701538, "learning_rate": 3.92900405405529e-05, "loss": 0.8228, "step": 4835 }, { "epoch": 1.5744957709824332, "grad_norm": 1.279751181602478, "learning_rate": 3.9268856412891474e-05, "loss": 0.7799, "step": 4840 }, { "epoch": 1.5761223162003903, "grad_norm": 1.5175522565841675, "learning_rate": 3.9247657078778444e-05, "loss": 0.8498, "step": 4845 }, { "epoch": 1.5777488614183475, "grad_norm": 1.3976547718048096, "learning_rate": 3.9226442560806145e-05, "loss": 0.8124, "step": 4850 }, { "epoch": 1.5793754066363044, "grad_norm": 1.7714158296585083, "learning_rate": 3.9205212881583064e-05, "loss": 0.8584, "step": 4855 }, { "epoch": 1.5810019518542615, "grad_norm": 1.3528929948806763, "learning_rate": 3.918396806373389e-05, "loss": 0.8048, "step": 4860 }, { "epoch": 1.5826284970722186, "grad_norm": 1.382393479347229, "learning_rate": 3.9162708129899406e-05, "loss": 0.8301, "step": 4865 }, { "epoch": 1.5842550422901756, "grad_norm": 1.48068368434906, "learning_rate": 3.914143310273653e-05, "loss": 0.8519, "step": 4870 }, { "epoch": 1.5858815875081327, "grad_norm": 1.1674385070800781, "learning_rate": 3.912014300491825e-05, "loss": 0.7924, "step": 4875 }, { "epoch": 1.5875081327260898, "grad_norm": 1.3044108152389526, "learning_rate": 3.9098837859133606e-05, "loss": 0.8556, "step": 4880 }, { "epoch": 1.5891346779440467, "grad_norm": 1.3392024040222168, "learning_rate": 3.907751768808771e-05, "loss": 0.8137, "step": 4885 }, { "epoch": 1.5907612231620039, "grad_norm": 1.2559105157852173, "learning_rate": 3.905618251450165e-05, "loss": 0.8084, "step": 4890 }, { "epoch": 1.592387768379961, "grad_norm": 1.5809398889541626, "learning_rate": 3.9034832361112516e-05, "loss": 0.8482, "step": 4895 }, { "epoch": 1.594014313597918, "grad_norm": 1.3132051229476929, "learning_rate": 3.9013467250673375e-05, "loss": 0.8286, "step": 4900 }, { "epoch": 1.595640858815875, "grad_norm": 1.160130262374878, "learning_rate": 3.8992087205953215e-05, "loss": 0.8276, "step": 4905 }, { "epoch": 1.5972674040338322, "grad_norm": 1.3265928030014038, "learning_rate": 3.897069224973694e-05, "loss": 0.8508, "step": 4910 }, { "epoch": 1.598893949251789, "grad_norm": 1.3934937715530396, "learning_rate": 3.894928240482536e-05, "loss": 0.8243, "step": 4915 }, { "epoch": 1.6005204944697464, "grad_norm": 1.3466092348098755, "learning_rate": 3.892785769403514e-05, "loss": 0.8304, "step": 4920 }, { "epoch": 1.6021470396877033, "grad_norm": 1.6005505323410034, "learning_rate": 3.89064181401988e-05, "loss": 0.8411, "step": 4925 }, { "epoch": 1.6037735849056602, "grad_norm": 1.5974273681640625, "learning_rate": 3.888496376616466e-05, "loss": 0.8445, "step": 4930 }, { "epoch": 1.6054001301236176, "grad_norm": 1.6641621589660645, "learning_rate": 3.886349459479684e-05, "loss": 0.8398, "step": 4935 }, { "epoch": 1.6070266753415745, "grad_norm": 1.3361982107162476, "learning_rate": 3.8842010648975244e-05, "loss": 0.8327, "step": 4940 }, { "epoch": 1.6086532205595314, "grad_norm": 1.2077546119689941, "learning_rate": 3.882051195159551e-05, "loss": 0.8718, "step": 4945 }, { "epoch": 1.6102797657774888, "grad_norm": 1.5431170463562012, "learning_rate": 3.879899852556899e-05, "loss": 0.8271, "step": 4950 }, { "epoch": 1.6119063109954457, "grad_norm": 1.392358660697937, "learning_rate": 3.877747039382275e-05, "loss": 0.8234, "step": 4955 }, { "epoch": 1.6135328562134026, "grad_norm": 1.507156252861023, "learning_rate": 3.87559275792995e-05, "loss": 0.8043, "step": 4960 }, { "epoch": 1.61515940143136, "grad_norm": 1.3174952268600464, "learning_rate": 3.873437010495764e-05, "loss": 0.8235, "step": 4965 }, { "epoch": 1.6167859466493169, "grad_norm": 1.222144365310669, "learning_rate": 3.871279799377116e-05, "loss": 0.833, "step": 4970 }, { "epoch": 1.6184124918672738, "grad_norm": 1.2789946794509888, "learning_rate": 3.869121126872964e-05, "loss": 0.8464, "step": 4975 }, { "epoch": 1.6200390370852311, "grad_norm": 1.63290536403656, "learning_rate": 3.8669609952838284e-05, "loss": 0.8361, "step": 4980 }, { "epoch": 1.621665582303188, "grad_norm": 1.3100494146347046, "learning_rate": 3.864799406911779e-05, "loss": 0.8432, "step": 4985 }, { "epoch": 1.623292127521145, "grad_norm": 1.309423565864563, "learning_rate": 3.86263636406044e-05, "loss": 0.8515, "step": 4990 }, { "epoch": 1.6249186727391023, "grad_norm": 1.2277017831802368, "learning_rate": 3.860471869034987e-05, "loss": 0.8413, "step": 4995 }, { "epoch": 1.6265452179570592, "grad_norm": 1.1988389492034912, "learning_rate": 3.8583059241421433e-05, "loss": 0.8243, "step": 5000 }, { "epoch": 1.628171763175016, "grad_norm": 1.3983453512191772, "learning_rate": 3.856138531690175e-05, "loss": 0.8416, "step": 5005 }, { "epoch": 1.6297983083929735, "grad_norm": 1.086287021636963, "learning_rate": 3.853969693988892e-05, "loss": 0.8441, "step": 5010 }, { "epoch": 1.6314248536109304, "grad_norm": 1.3003473281860352, "learning_rate": 3.851799413349646e-05, "loss": 0.8465, "step": 5015 }, { "epoch": 1.6330513988288873, "grad_norm": 1.4640402793884277, "learning_rate": 3.849627692085324e-05, "loss": 0.8222, "step": 5020 }, { "epoch": 1.6346779440468446, "grad_norm": 1.2459534406661987, "learning_rate": 3.8474545325103485e-05, "loss": 0.8174, "step": 5025 }, { "epoch": 1.6363044892648015, "grad_norm": 1.4799909591674805, "learning_rate": 3.845279936940678e-05, "loss": 0.8465, "step": 5030 }, { "epoch": 1.6379310344827587, "grad_norm": 1.1371746063232422, "learning_rate": 3.8431039076937966e-05, "loss": 0.8287, "step": 5035 }, { "epoch": 1.6395575797007158, "grad_norm": 1.4468460083007812, "learning_rate": 3.8409264470887204e-05, "loss": 0.8107, "step": 5040 }, { "epoch": 1.6411841249186727, "grad_norm": 1.6086653470993042, "learning_rate": 3.838747557445988e-05, "loss": 0.803, "step": 5045 }, { "epoch": 1.6428106701366298, "grad_norm": 1.378934621810913, "learning_rate": 3.836567241087663e-05, "loss": 0.8205, "step": 5050 }, { "epoch": 1.644437215354587, "grad_norm": 1.1641347408294678, "learning_rate": 3.8343855003373286e-05, "loss": 0.8303, "step": 5055 }, { "epoch": 1.6460637605725439, "grad_norm": 1.4353915452957153, "learning_rate": 3.832202337520085e-05, "loss": 0.8452, "step": 5060 }, { "epoch": 1.647690305790501, "grad_norm": 1.4637186527252197, "learning_rate": 3.8300177549625504e-05, "loss": 0.7972, "step": 5065 }, { "epoch": 1.6493168510084582, "grad_norm": 1.2714850902557373, "learning_rate": 3.8278317549928534e-05, "loss": 0.8366, "step": 5070 }, { "epoch": 1.650943396226415, "grad_norm": 1.5720365047454834, "learning_rate": 3.8256443399406344e-05, "loss": 0.8374, "step": 5075 }, { "epoch": 1.6525699414443722, "grad_norm": 1.1693074703216553, "learning_rate": 3.823455512137042e-05, "loss": 0.8434, "step": 5080 }, { "epoch": 1.6541964866623293, "grad_norm": 1.2652405500411987, "learning_rate": 3.8212652739147306e-05, "loss": 0.8264, "step": 5085 }, { "epoch": 1.6558230318802862, "grad_norm": 1.3884204626083374, "learning_rate": 3.819073627607856e-05, "loss": 0.8197, "step": 5090 }, { "epoch": 1.6574495770982434, "grad_norm": 1.0851396322250366, "learning_rate": 3.8168805755520775e-05, "loss": 0.8386, "step": 5095 }, { "epoch": 1.6590761223162005, "grad_norm": 1.5833030939102173, "learning_rate": 3.81468612008455e-05, "loss": 0.831, "step": 5100 }, { "epoch": 1.6607026675341574, "grad_norm": 1.3394887447357178, "learning_rate": 3.8124902635439235e-05, "loss": 0.83, "step": 5105 }, { "epoch": 1.6623292127521145, "grad_norm": 1.2865214347839355, "learning_rate": 3.810293008270345e-05, "loss": 0.7938, "step": 5110 }, { "epoch": 1.6639557579700717, "grad_norm": 1.2463825941085815, "learning_rate": 3.808094356605448e-05, "loss": 0.8134, "step": 5115 }, { "epoch": 1.6655823031880286, "grad_norm": 1.397323489189148, "learning_rate": 3.8058943108923565e-05, "loss": 0.8208, "step": 5120 }, { "epoch": 1.6672088484059857, "grad_norm": 1.3714436292648315, "learning_rate": 3.80369287347568e-05, "loss": 0.805, "step": 5125 }, { "epoch": 1.6688353936239428, "grad_norm": 1.3371800184249878, "learning_rate": 3.801490046701509e-05, "loss": 0.8171, "step": 5130 }, { "epoch": 1.6704619388418998, "grad_norm": 1.5197476148605347, "learning_rate": 3.799285832917417e-05, "loss": 0.8341, "step": 5135 }, { "epoch": 1.6720884840598569, "grad_norm": 1.3327134847640991, "learning_rate": 3.797080234472456e-05, "loss": 0.8106, "step": 5140 }, { "epoch": 1.673715029277814, "grad_norm": 1.2556819915771484, "learning_rate": 3.7948732537171516e-05, "loss": 0.8302, "step": 5145 }, { "epoch": 1.675341574495771, "grad_norm": 1.2372509241104126, "learning_rate": 3.792664893003503e-05, "loss": 0.8387, "step": 5150 }, { "epoch": 1.676968119713728, "grad_norm": 1.2688294649124146, "learning_rate": 3.7904551546849806e-05, "loss": 0.8549, "step": 5155 }, { "epoch": 1.6785946649316852, "grad_norm": 1.386313796043396, "learning_rate": 3.788244041116525e-05, "loss": 0.8219, "step": 5160 }, { "epoch": 1.680221210149642, "grad_norm": 1.4494905471801758, "learning_rate": 3.7860315546545375e-05, "loss": 0.8411, "step": 5165 }, { "epoch": 1.6818477553675992, "grad_norm": 1.1779999732971191, "learning_rate": 3.783817697656887e-05, "loss": 0.8236, "step": 5170 }, { "epoch": 1.6834743005855564, "grad_norm": 1.2401928901672363, "learning_rate": 3.7816024724829e-05, "loss": 0.8215, "step": 5175 }, { "epoch": 1.6851008458035133, "grad_norm": 1.5136502981185913, "learning_rate": 3.779385881493364e-05, "loss": 0.8195, "step": 5180 }, { "epoch": 1.6867273910214704, "grad_norm": 1.2013198137283325, "learning_rate": 3.777167927050519e-05, "loss": 0.8236, "step": 5185 }, { "epoch": 1.6883539362394275, "grad_norm": 1.422413945198059, "learning_rate": 3.77494861151806e-05, "loss": 0.8126, "step": 5190 }, { "epoch": 1.6899804814573844, "grad_norm": 1.2437760829925537, "learning_rate": 3.772727937261132e-05, "loss": 0.8603, "step": 5195 }, { "epoch": 1.6916070266753416, "grad_norm": 1.327820897102356, "learning_rate": 3.7705059066463274e-05, "loss": 0.8296, "step": 5200 }, { "epoch": 1.6932335718932987, "grad_norm": 1.3072866201400757, "learning_rate": 3.7682825220416865e-05, "loss": 0.8538, "step": 5205 }, { "epoch": 1.6948601171112556, "grad_norm": 1.671960473060608, "learning_rate": 3.766057785816688e-05, "loss": 0.844, "step": 5210 }, { "epoch": 1.6964866623292127, "grad_norm": 1.5522574186325073, "learning_rate": 3.7638317003422564e-05, "loss": 0.8346, "step": 5215 }, { "epoch": 1.6981132075471699, "grad_norm": 1.206579327583313, "learning_rate": 3.7616042679907494e-05, "loss": 0.8477, "step": 5220 }, { "epoch": 1.6997397527651268, "grad_norm": 1.1209266185760498, "learning_rate": 3.759375491135964e-05, "loss": 0.8509, "step": 5225 }, { "epoch": 1.701366297983084, "grad_norm": 1.1288695335388184, "learning_rate": 3.7571453721531256e-05, "loss": 0.813, "step": 5230 }, { "epoch": 1.702992843201041, "grad_norm": 1.2574747800827026, "learning_rate": 3.7549139134188954e-05, "loss": 0.825, "step": 5235 }, { "epoch": 1.704619388418998, "grad_norm": 1.1091290712356567, "learning_rate": 3.752681117311358e-05, "loss": 0.816, "step": 5240 }, { "epoch": 1.706245933636955, "grad_norm": 1.2635185718536377, "learning_rate": 3.7504469862100256e-05, "loss": 0.8201, "step": 5245 }, { "epoch": 1.7078724788549122, "grad_norm": 1.2298431396484375, "learning_rate": 3.748211522495831e-05, "loss": 0.8449, "step": 5250 }, { "epoch": 1.7094990240728691, "grad_norm": 1.2688336372375488, "learning_rate": 3.745974728551129e-05, "loss": 0.8411, "step": 5255 }, { "epoch": 1.7111255692908263, "grad_norm": 1.1729408502578735, "learning_rate": 3.7437366067596924e-05, "loss": 0.8263, "step": 5260 }, { "epoch": 1.7127521145087834, "grad_norm": 1.4828890562057495, "learning_rate": 3.7414971595067074e-05, "loss": 0.8024, "step": 5265 }, { "epoch": 1.7143786597267403, "grad_norm": 1.4438930749893188, "learning_rate": 3.7392563891787726e-05, "loss": 0.8605, "step": 5270 }, { "epoch": 1.7160052049446974, "grad_norm": 1.0801341533660889, "learning_rate": 3.7370142981638996e-05, "loss": 0.7957, "step": 5275 }, { "epoch": 1.7176317501626546, "grad_norm": 1.2276865243911743, "learning_rate": 3.734770888851504e-05, "loss": 0.7981, "step": 5280 }, { "epoch": 1.7192582953806115, "grad_norm": 1.225500226020813, "learning_rate": 3.732526163632408e-05, "loss": 0.8377, "step": 5285 }, { "epoch": 1.7208848405985686, "grad_norm": 1.438228964805603, "learning_rate": 3.7302801248988365e-05, "loss": 0.8458, "step": 5290 }, { "epoch": 1.7225113858165257, "grad_norm": 1.3257967233657837, "learning_rate": 3.728032775044413e-05, "loss": 0.8475, "step": 5295 }, { "epoch": 1.7241379310344827, "grad_norm": 1.206342339515686, "learning_rate": 3.7257841164641595e-05, "loss": 0.8224, "step": 5300 }, { "epoch": 1.7257644762524398, "grad_norm": 1.3063772916793823, "learning_rate": 3.723534151554492e-05, "loss": 0.8129, "step": 5305 }, { "epoch": 1.727391021470397, "grad_norm": 1.5015925168991089, "learning_rate": 3.721282882713218e-05, "loss": 0.8269, "step": 5310 }, { "epoch": 1.7290175666883538, "grad_norm": 1.1291321516036987, "learning_rate": 3.719030312339536e-05, "loss": 0.8516, "step": 5315 }, { "epoch": 1.730644111906311, "grad_norm": 1.3298044204711914, "learning_rate": 3.7167764428340314e-05, "loss": 0.8413, "step": 5320 }, { "epoch": 1.732270657124268, "grad_norm": 1.4059034585952759, "learning_rate": 3.7145212765986725e-05, "loss": 0.8521, "step": 5325 }, { "epoch": 1.733897202342225, "grad_norm": 1.3868982791900635, "learning_rate": 3.7122648160368125e-05, "loss": 0.8237, "step": 5330 }, { "epoch": 1.7355237475601821, "grad_norm": 1.3850789070129395, "learning_rate": 3.71000706355318e-05, "loss": 0.8619, "step": 5335 }, { "epoch": 1.7371502927781393, "grad_norm": 1.36600661277771, "learning_rate": 3.7077480215538854e-05, "loss": 0.8125, "step": 5340 }, { "epoch": 1.7387768379960962, "grad_norm": 1.1057202816009521, "learning_rate": 3.7054876924464075e-05, "loss": 0.8369, "step": 5345 }, { "epoch": 1.7404033832140533, "grad_norm": 1.180856704711914, "learning_rate": 3.7032260786396025e-05, "loss": 0.8459, "step": 5350 }, { "epoch": 1.7420299284320104, "grad_norm": 1.2434580326080322, "learning_rate": 3.700963182543691e-05, "loss": 0.8152, "step": 5355 }, { "epoch": 1.7436564736499673, "grad_norm": 1.2772276401519775, "learning_rate": 3.698699006570263e-05, "loss": 0.8174, "step": 5360 }, { "epoch": 1.7452830188679245, "grad_norm": 1.3301308155059814, "learning_rate": 3.696433553132271e-05, "loss": 0.8175, "step": 5365 }, { "epoch": 1.7469095640858816, "grad_norm": 3.1788594722747803, "learning_rate": 3.694166824644032e-05, "loss": 0.8399, "step": 5370 }, { "epoch": 1.7485361093038385, "grad_norm": 1.3008415699005127, "learning_rate": 3.691898823521216e-05, "loss": 0.7988, "step": 5375 }, { "epoch": 1.7501626545217959, "grad_norm": 1.4577161073684692, "learning_rate": 3.6896295521808556e-05, "loss": 0.8546, "step": 5380 }, { "epoch": 1.7517891997397528, "grad_norm": 1.2147691249847412, "learning_rate": 3.6873590130413324e-05, "loss": 0.8733, "step": 5385 }, { "epoch": 1.7534157449577097, "grad_norm": 1.4430413246154785, "learning_rate": 3.685087208522381e-05, "loss": 0.8058, "step": 5390 }, { "epoch": 1.755042290175667, "grad_norm": 1.3676267862319946, "learning_rate": 3.682814141045085e-05, "loss": 0.8336, "step": 5395 }, { "epoch": 1.756668835393624, "grad_norm": 1.2504734992980957, "learning_rate": 3.6805398130318736e-05, "loss": 0.8042, "step": 5400 }, { "epoch": 1.7582953806115809, "grad_norm": 1.1897562742233276, "learning_rate": 3.6782642269065177e-05, "loss": 0.8365, "step": 5405 }, { "epoch": 1.7599219258295382, "grad_norm": 1.3107147216796875, "learning_rate": 3.675987385094131e-05, "loss": 0.8059, "step": 5410 }, { "epoch": 1.7615484710474951, "grad_norm": 1.2308214902877808, "learning_rate": 3.673709290021166e-05, "loss": 0.8314, "step": 5415 }, { "epoch": 1.763175016265452, "grad_norm": 1.130685567855835, "learning_rate": 3.6714299441154084e-05, "loss": 0.8536, "step": 5420 }, { "epoch": 1.7648015614834094, "grad_norm": 1.2818156480789185, "learning_rate": 3.669149349805978e-05, "loss": 0.8334, "step": 5425 }, { "epoch": 1.7664281067013663, "grad_norm": 1.5565329790115356, "learning_rate": 3.666867509523325e-05, "loss": 0.8184, "step": 5430 }, { "epoch": 1.7680546519193232, "grad_norm": 1.2323977947235107, "learning_rate": 3.664584425699229e-05, "loss": 0.8275, "step": 5435 }, { "epoch": 1.7696811971372806, "grad_norm": 1.352149248123169, "learning_rate": 3.6623001007667926e-05, "loss": 0.8286, "step": 5440 }, { "epoch": 1.7713077423552375, "grad_norm": 1.1997655630111694, "learning_rate": 3.660014537160441e-05, "loss": 0.8171, "step": 5445 }, { "epoch": 1.7729342875731944, "grad_norm": 1.2808606624603271, "learning_rate": 3.6577277373159216e-05, "loss": 0.8268, "step": 5450 }, { "epoch": 1.7745608327911517, "grad_norm": 1.3642851114273071, "learning_rate": 3.655897408986487e-05, "loss": 0.8436, "step": 5455 }, { "epoch": 1.7761873780091086, "grad_norm": 1.2705271244049072, "learning_rate": 3.653608390055549e-05, "loss": 0.7944, "step": 5460 }, { "epoch": 1.7778139232270656, "grad_norm": 1.3969600200653076, "learning_rate": 3.651318141713532e-05, "loss": 0.8344, "step": 5465 }, { "epoch": 1.779440468445023, "grad_norm": 1.3231524229049683, "learning_rate": 3.649026666401176e-05, "loss": 0.8169, "step": 5470 }, { "epoch": 1.7810670136629798, "grad_norm": 1.4072846174240112, "learning_rate": 3.646733966560527e-05, "loss": 0.8435, "step": 5475 }, { "epoch": 1.7826935588809367, "grad_norm": 1.319799780845642, "learning_rate": 3.644440044634935e-05, "loss": 0.8216, "step": 5480 }, { "epoch": 1.784320104098894, "grad_norm": 1.261232852935791, "learning_rate": 3.642144903069055e-05, "loss": 0.8409, "step": 5485 }, { "epoch": 1.785946649316851, "grad_norm": 1.3804266452789307, "learning_rate": 3.639848544308841e-05, "loss": 0.8231, "step": 5490 }, { "epoch": 1.7875731945348081, "grad_norm": 1.1093454360961914, "learning_rate": 3.637550970801543e-05, "loss": 0.8118, "step": 5495 }, { "epoch": 1.7891997397527653, "grad_norm": 1.4532004594802856, "learning_rate": 3.635252184995705e-05, "loss": 0.8518, "step": 5500 }, { "epoch": 1.7908262849707222, "grad_norm": 1.0900368690490723, "learning_rate": 3.632952189341166e-05, "loss": 0.8859, "step": 5505 }, { "epoch": 1.7924528301886793, "grad_norm": 1.3891526460647583, "learning_rate": 3.630650986289052e-05, "loss": 0.8328, "step": 5510 }, { "epoch": 1.7940793754066364, "grad_norm": 1.2490516901016235, "learning_rate": 3.628348578291776e-05, "loss": 0.8394, "step": 5515 }, { "epoch": 1.7957059206245933, "grad_norm": 1.255976676940918, "learning_rate": 3.626044967803036e-05, "loss": 0.8476, "step": 5520 }, { "epoch": 1.7973324658425505, "grad_norm": 1.2879066467285156, "learning_rate": 3.623740157277811e-05, "loss": 0.8299, "step": 5525 }, { "epoch": 1.7989590110605076, "grad_norm": 1.371375560760498, "learning_rate": 3.6214341491723567e-05, "loss": 0.8239, "step": 5530 }, { "epoch": 1.8005855562784645, "grad_norm": 1.5211347341537476, "learning_rate": 3.619126945944209e-05, "loss": 0.7663, "step": 5535 }, { "epoch": 1.8022121014964216, "grad_norm": 1.3124513626098633, "learning_rate": 3.616818550052176e-05, "loss": 0.7954, "step": 5540 }, { "epoch": 1.8038386467143788, "grad_norm": 1.2937182188034058, "learning_rate": 3.614508963956335e-05, "loss": 0.8188, "step": 5545 }, { "epoch": 1.8054651919323357, "grad_norm": 1.3660154342651367, "learning_rate": 3.612198190118035e-05, "loss": 0.8134, "step": 5550 }, { "epoch": 1.8070917371502928, "grad_norm": 1.1961839199066162, "learning_rate": 3.609886230999886e-05, "loss": 0.8237, "step": 5555 }, { "epoch": 1.80871828236825, "grad_norm": 1.4504514932632446, "learning_rate": 3.607573089065768e-05, "loss": 0.8442, "step": 5560 }, { "epoch": 1.8103448275862069, "grad_norm": 1.2927675247192383, "learning_rate": 3.605258766780815e-05, "loss": 0.8359, "step": 5565 }, { "epoch": 1.811971372804164, "grad_norm": 1.2561722993850708, "learning_rate": 3.6029432666114216e-05, "loss": 0.8292, "step": 5570 }, { "epoch": 1.8135979180221211, "grad_norm": 1.2742695808410645, "learning_rate": 3.600626591025239e-05, "loss": 0.8352, "step": 5575 }, { "epoch": 1.815224463240078, "grad_norm": 1.2760121822357178, "learning_rate": 3.598308742491168e-05, "loss": 0.8354, "step": 5580 }, { "epoch": 1.8168510084580352, "grad_norm": 1.2482631206512451, "learning_rate": 3.595989723479363e-05, "loss": 0.8358, "step": 5585 }, { "epoch": 1.8184775536759923, "grad_norm": 1.416909098625183, "learning_rate": 3.593669536461223e-05, "loss": 0.8332, "step": 5590 }, { "epoch": 1.8201040988939492, "grad_norm": 1.4360785484313965, "learning_rate": 3.591348183909391e-05, "loss": 0.8133, "step": 5595 }, { "epoch": 1.8217306441119063, "grad_norm": 1.2243309020996094, "learning_rate": 3.589025668297758e-05, "loss": 0.8281, "step": 5600 }, { "epoch": 1.8233571893298635, "grad_norm": 1.158811330795288, "learning_rate": 3.586701992101446e-05, "loss": 0.8332, "step": 5605 }, { "epoch": 1.8249837345478204, "grad_norm": 1.5002223253250122, "learning_rate": 3.58437715779682e-05, "loss": 0.8267, "step": 5610 }, { "epoch": 1.8266102797657775, "grad_norm": 1.2280634641647339, "learning_rate": 3.582051167861477e-05, "loss": 0.8122, "step": 5615 }, { "epoch": 1.8282368249837346, "grad_norm": 1.1525310277938843, "learning_rate": 3.579724024774245e-05, "loss": 0.833, "step": 5620 }, { "epoch": 1.8298633702016915, "grad_norm": 1.2813570499420166, "learning_rate": 3.577395731015184e-05, "loss": 0.8309, "step": 5625 }, { "epoch": 1.8314899154196487, "grad_norm": 1.2277755737304688, "learning_rate": 3.575066289065576e-05, "loss": 0.803, "step": 5630 }, { "epoch": 1.8331164606376058, "grad_norm": 1.40836501121521, "learning_rate": 3.57273570140793e-05, "loss": 0.8069, "step": 5635 }, { "epoch": 1.8347430058555627, "grad_norm": 1.6087926626205444, "learning_rate": 3.570403970525975e-05, "loss": 0.8626, "step": 5640 }, { "epoch": 1.8363695510735198, "grad_norm": 1.2652931213378906, "learning_rate": 3.5680710989046565e-05, "loss": 0.8092, "step": 5645 }, { "epoch": 1.837996096291477, "grad_norm": 1.193189263343811, "learning_rate": 3.565737089030138e-05, "loss": 0.8417, "step": 5650 }, { "epoch": 1.8396226415094339, "grad_norm": 1.1674658060073853, "learning_rate": 3.5634019433897964e-05, "loss": 0.7936, "step": 5655 }, { "epoch": 1.841249186727391, "grad_norm": 1.448638916015625, "learning_rate": 3.561065664472216e-05, "loss": 0.8086, "step": 5660 }, { "epoch": 1.8428757319453482, "grad_norm": 1.3727436065673828, "learning_rate": 3.558728254767192e-05, "loss": 0.8334, "step": 5665 }, { "epoch": 1.844502277163305, "grad_norm": 1.1043076515197754, "learning_rate": 3.556389716765722e-05, "loss": 0.8167, "step": 5670 }, { "epoch": 1.8461288223812622, "grad_norm": 1.3153088092803955, "learning_rate": 3.5540500529600096e-05, "loss": 0.7952, "step": 5675 }, { "epoch": 1.8477553675992193, "grad_norm": 1.3390392065048218, "learning_rate": 3.551709265843455e-05, "loss": 0.8587, "step": 5680 }, { "epoch": 1.8493819128171762, "grad_norm": 1.3338311910629272, "learning_rate": 3.5493673579106555e-05, "loss": 0.8942, "step": 5685 }, { "epoch": 1.8510084580351334, "grad_norm": 1.1834983825683594, "learning_rate": 3.547024331657407e-05, "loss": 0.8172, "step": 5690 }, { "epoch": 1.8526350032530905, "grad_norm": 1.2427376508712769, "learning_rate": 3.5446801895806904e-05, "loss": 0.8363, "step": 5695 }, { "epoch": 1.8542615484710474, "grad_norm": 1.455535888671875, "learning_rate": 3.542334934178682e-05, "loss": 0.8224, "step": 5700 }, { "epoch": 1.8558880936890045, "grad_norm": 1.3923271894454956, "learning_rate": 3.539988567950741e-05, "loss": 0.8281, "step": 5705 }, { "epoch": 1.8575146389069617, "grad_norm": 1.1042308807373047, "learning_rate": 3.537641093397411e-05, "loss": 0.8272, "step": 5710 }, { "epoch": 1.8591411841249186, "grad_norm": 1.1908977031707764, "learning_rate": 3.53529251302042e-05, "loss": 0.8138, "step": 5715 }, { "epoch": 1.8607677293428757, "grad_norm": 1.2812453508377075, "learning_rate": 3.532942829322668e-05, "loss": 0.796, "step": 5720 }, { "epoch": 1.8623942745608328, "grad_norm": 1.413095474243164, "learning_rate": 3.530592044808237e-05, "loss": 0.8647, "step": 5725 }, { "epoch": 1.8640208197787898, "grad_norm": 1.1623964309692383, "learning_rate": 3.528240161982379e-05, "loss": 0.8483, "step": 5730 }, { "epoch": 1.8656473649967469, "grad_norm": 1.2985146045684814, "learning_rate": 3.525887183351517e-05, "loss": 0.8369, "step": 5735 }, { "epoch": 1.867273910214704, "grad_norm": 1.375532627105713, "learning_rate": 3.5235331114232425e-05, "loss": 0.8284, "step": 5740 }, { "epoch": 1.868900455432661, "grad_norm": 1.6439818143844604, "learning_rate": 3.521177948706311e-05, "loss": 0.8372, "step": 5745 }, { "epoch": 1.870527000650618, "grad_norm": 1.505880355834961, "learning_rate": 3.518821697710642e-05, "loss": 0.8421, "step": 5750 }, { "epoch": 1.8721535458685752, "grad_norm": 1.478026032447815, "learning_rate": 3.5164643609473114e-05, "loss": 0.8498, "step": 5755 }, { "epoch": 1.873780091086532, "grad_norm": 1.2411234378814697, "learning_rate": 3.5141059409285584e-05, "loss": 0.8141, "step": 5760 }, { "epoch": 1.8754066363044892, "grad_norm": 1.4108761548995972, "learning_rate": 3.51174644016777e-05, "loss": 0.8244, "step": 5765 }, { "epoch": 1.8770331815224464, "grad_norm": 1.3026715517044067, "learning_rate": 3.509385861179488e-05, "loss": 0.8069, "step": 5770 }, { "epoch": 1.8786597267404033, "grad_norm": 1.1237925291061401, "learning_rate": 3.507024206479406e-05, "loss": 0.8226, "step": 5775 }, { "epoch": 1.8802862719583604, "grad_norm": 1.353853702545166, "learning_rate": 3.504661478584359e-05, "loss": 0.8292, "step": 5780 }, { "epoch": 1.8819128171763175, "grad_norm": 1.1299301385879517, "learning_rate": 3.502297680012327e-05, "loss": 0.8095, "step": 5785 }, { "epoch": 1.8835393623942744, "grad_norm": 1.1047401428222656, "learning_rate": 3.4999328132824326e-05, "loss": 0.8069, "step": 5790 }, { "epoch": 1.8851659076122316, "grad_norm": 1.2694413661956787, "learning_rate": 3.4975668809149375e-05, "loss": 0.8136, "step": 5795 }, { "epoch": 1.8867924528301887, "grad_norm": 1.1931685209274292, "learning_rate": 3.495199885431236e-05, "loss": 0.8016, "step": 5800 }, { "epoch": 1.8884189980481456, "grad_norm": 1.4330689907073975, "learning_rate": 3.492831829353857e-05, "loss": 0.8288, "step": 5805 }, { "epoch": 1.8900455432661027, "grad_norm": 1.2044175863265991, "learning_rate": 3.4904627152064605e-05, "loss": 0.8271, "step": 5810 }, { "epoch": 1.8916720884840599, "grad_norm": 1.3299918174743652, "learning_rate": 3.488092545513833e-05, "loss": 0.812, "step": 5815 }, { "epoch": 1.8932986337020168, "grad_norm": 1.3373841047286987, "learning_rate": 3.485721322801886e-05, "loss": 0.8409, "step": 5820 }, { "epoch": 1.8949251789199741, "grad_norm": 1.1944694519042969, "learning_rate": 3.483349049597653e-05, "loss": 0.833, "step": 5825 }, { "epoch": 1.896551724137931, "grad_norm": 1.173740267753601, "learning_rate": 3.480975728429288e-05, "loss": 0.8203, "step": 5830 }, { "epoch": 1.898178269355888, "grad_norm": 1.3078852891921997, "learning_rate": 3.4786013618260615e-05, "loss": 0.8289, "step": 5835 }, { "epoch": 1.8998048145738453, "grad_norm": 1.3262101411819458, "learning_rate": 3.476225952318356e-05, "loss": 0.8086, "step": 5840 }, { "epoch": 1.9014313597918022, "grad_norm": 1.5123237371444702, "learning_rate": 3.47384950243767e-05, "loss": 0.8296, "step": 5845 }, { "epoch": 1.9030579050097591, "grad_norm": 1.2611478567123413, "learning_rate": 3.471472014716606e-05, "loss": 0.8393, "step": 5850 }, { "epoch": 1.9046844502277165, "grad_norm": 1.2621562480926514, "learning_rate": 3.4690934916888754e-05, "loss": 0.8628, "step": 5855 }, { "epoch": 1.9063109954456734, "grad_norm": 1.4159252643585205, "learning_rate": 3.4667139358892914e-05, "loss": 0.8541, "step": 5860 }, { "epoch": 1.9079375406636303, "grad_norm": 1.3518465757369995, "learning_rate": 3.464333349853769e-05, "loss": 0.832, "step": 5865 }, { "epoch": 1.9095640858815877, "grad_norm": 1.350602388381958, "learning_rate": 3.461951736119321e-05, "loss": 0.7949, "step": 5870 }, { "epoch": 1.9111906310995446, "grad_norm": 1.7342414855957031, "learning_rate": 3.459569097224054e-05, "loss": 0.8273, "step": 5875 }, { "epoch": 1.9128171763175015, "grad_norm": 1.444703221321106, "learning_rate": 3.45718543570717e-05, "loss": 0.8165, "step": 5880 }, { "epoch": 1.9144437215354588, "grad_norm": 1.1751919984817505, "learning_rate": 3.454800754108957e-05, "loss": 0.8269, "step": 5885 }, { "epoch": 1.9160702667534157, "grad_norm": 1.3509085178375244, "learning_rate": 3.452415054970793e-05, "loss": 0.8033, "step": 5890 }, { "epoch": 1.9176968119713727, "grad_norm": 1.4450467824935913, "learning_rate": 3.45002834083514e-05, "loss": 0.8295, "step": 5895 }, { "epoch": 1.91932335718933, "grad_norm": 1.3500406742095947, "learning_rate": 3.4476406142455394e-05, "loss": 0.8615, "step": 5900 }, { "epoch": 1.920949902407287, "grad_norm": 1.3259705305099487, "learning_rate": 3.445251877746616e-05, "loss": 0.8255, "step": 5905 }, { "epoch": 1.9225764476252438, "grad_norm": 1.1814990043640137, "learning_rate": 3.442862133884067e-05, "loss": 0.8348, "step": 5910 }, { "epoch": 1.9242029928432012, "grad_norm": 1.2775522470474243, "learning_rate": 3.440471385204664e-05, "loss": 0.8048, "step": 5915 }, { "epoch": 1.925829538061158, "grad_norm": 1.3201384544372559, "learning_rate": 3.43807963425625e-05, "loss": 0.8544, "step": 5920 }, { "epoch": 1.927456083279115, "grad_norm": 1.4966453313827515, "learning_rate": 3.4356868835877376e-05, "loss": 0.8033, "step": 5925 }, { "epoch": 1.9290826284970723, "grad_norm": 1.5843162536621094, "learning_rate": 3.433293135749101e-05, "loss": 0.8039, "step": 5930 }, { "epoch": 1.9307091737150293, "grad_norm": 1.3175036907196045, "learning_rate": 3.430898393291381e-05, "loss": 0.8092, "step": 5935 }, { "epoch": 1.9323357189329864, "grad_norm": 1.2300385236740112, "learning_rate": 3.4285026587666755e-05, "loss": 0.8708, "step": 5940 }, { "epoch": 1.9339622641509435, "grad_norm": 1.522381067276001, "learning_rate": 3.426105934728141e-05, "loss": 0.8077, "step": 5945 }, { "epoch": 1.9355888093689004, "grad_norm": 1.3268636465072632, "learning_rate": 3.423708223729988e-05, "loss": 0.8823, "step": 5950 }, { "epoch": 1.9372153545868576, "grad_norm": 1.4455766677856445, "learning_rate": 3.4213095283274807e-05, "loss": 0.8131, "step": 5955 }, { "epoch": 1.9388418998048147, "grad_norm": 1.2035167217254639, "learning_rate": 3.41890985107693e-05, "loss": 0.8193, "step": 5960 }, { "epoch": 1.9404684450227716, "grad_norm": 1.1606972217559814, "learning_rate": 3.416509194535693e-05, "loss": 0.7976, "step": 5965 }, { "epoch": 1.9420949902407287, "grad_norm": 1.282204031944275, "learning_rate": 3.414107561262173e-05, "loss": 0.8039, "step": 5970 }, { "epoch": 1.9437215354586859, "grad_norm": 1.5579981803894043, "learning_rate": 3.411704953815813e-05, "loss": 0.8416, "step": 5975 }, { "epoch": 1.9453480806766428, "grad_norm": 1.169567584991455, "learning_rate": 3.409301374757092e-05, "loss": 0.8252, "step": 5980 }, { "epoch": 1.9469746258946, "grad_norm": 1.220754623413086, "learning_rate": 3.406896826647528e-05, "loss": 0.8361, "step": 5985 }, { "epoch": 1.948601171112557, "grad_norm": 1.150728464126587, "learning_rate": 3.404491312049669e-05, "loss": 0.8214, "step": 5990 }, { "epoch": 1.950227716330514, "grad_norm": 1.2067211866378784, "learning_rate": 3.4020848335270944e-05, "loss": 0.8192, "step": 5995 }, { "epoch": 1.951854261548471, "grad_norm": 1.3753764629364014, "learning_rate": 3.3996773936444104e-05, "loss": 0.8187, "step": 6000 }, { "epoch": 1.9534808067664282, "grad_norm": 1.8808735609054565, "learning_rate": 3.397268994967248e-05, "loss": 0.8038, "step": 6005 }, { "epoch": 1.9551073519843851, "grad_norm": 1.257872462272644, "learning_rate": 3.39485964006226e-05, "loss": 0.8278, "step": 6010 }, { "epoch": 1.9567338972023423, "grad_norm": 1.3824732303619385, "learning_rate": 3.392449331497117e-05, "loss": 0.8338, "step": 6015 }, { "epoch": 1.9583604424202994, "grad_norm": 1.2543541193008423, "learning_rate": 3.3900380718405096e-05, "loss": 0.8264, "step": 6020 }, { "epoch": 1.9599869876382563, "grad_norm": 1.5073655843734741, "learning_rate": 3.387625863662137e-05, "loss": 0.7887, "step": 6025 }, { "epoch": 1.9616135328562134, "grad_norm": 1.3938024044036865, "learning_rate": 3.3852127095327115e-05, "loss": 0.826, "step": 6030 }, { "epoch": 1.9632400780741706, "grad_norm": 1.2897905111312866, "learning_rate": 3.3827986120239556e-05, "loss": 0.8352, "step": 6035 }, { "epoch": 1.9648666232921275, "grad_norm": 1.134865641593933, "learning_rate": 3.380383573708594e-05, "loss": 0.8083, "step": 6040 }, { "epoch": 1.9664931685100846, "grad_norm": 1.2754161357879639, "learning_rate": 3.377967597160355e-05, "loss": 0.8304, "step": 6045 }, { "epoch": 1.9681197137280417, "grad_norm": 1.239772081375122, "learning_rate": 3.375550684953968e-05, "loss": 0.8116, "step": 6050 }, { "epoch": 1.9697462589459986, "grad_norm": 1.3194164037704468, "learning_rate": 3.373132839665159e-05, "loss": 0.832, "step": 6055 }, { "epoch": 1.9713728041639558, "grad_norm": 1.3960579633712769, "learning_rate": 3.3707140638706445e-05, "loss": 0.823, "step": 6060 }, { "epoch": 1.972999349381913, "grad_norm": 1.2290226221084595, "learning_rate": 3.368294360148141e-05, "loss": 0.7995, "step": 6065 }, { "epoch": 1.9746258945998698, "grad_norm": 1.3376431465148926, "learning_rate": 3.365873731076346e-05, "loss": 0.8063, "step": 6070 }, { "epoch": 1.976252439817827, "grad_norm": 1.4273117780685425, "learning_rate": 3.363452179234946e-05, "loss": 0.8239, "step": 6075 }, { "epoch": 1.977878985035784, "grad_norm": 1.5619248151779175, "learning_rate": 3.3610297072046126e-05, "loss": 0.8197, "step": 6080 }, { "epoch": 1.979505530253741, "grad_norm": 1.6111010313034058, "learning_rate": 3.3586063175669957e-05, "loss": 0.8146, "step": 6085 }, { "epoch": 1.9811320754716981, "grad_norm": 1.3023724555969238, "learning_rate": 3.356182012904725e-05, "loss": 0.8218, "step": 6090 }, { "epoch": 1.9827586206896552, "grad_norm": 1.0660532712936401, "learning_rate": 3.353756795801402e-05, "loss": 0.8369, "step": 6095 }, { "epoch": 1.9843851659076122, "grad_norm": 1.283933401107788, "learning_rate": 3.351330668841605e-05, "loss": 0.7965, "step": 6100 }, { "epoch": 1.9860117111255693, "grad_norm": 1.3030773401260376, "learning_rate": 3.348903634610879e-05, "loss": 0.8273, "step": 6105 }, { "epoch": 1.9876382563435264, "grad_norm": 1.2594255208969116, "learning_rate": 3.346475695695737e-05, "loss": 0.8562, "step": 6110 }, { "epoch": 1.9892648015614833, "grad_norm": 1.2553935050964355, "learning_rate": 3.344046854683656e-05, "loss": 0.7909, "step": 6115 }, { "epoch": 1.9908913467794405, "grad_norm": 1.3310219049453735, "learning_rate": 3.341617114163074e-05, "loss": 0.8524, "step": 6120 }, { "epoch": 1.9925178919973976, "grad_norm": 1.367447018623352, "learning_rate": 3.3391864767233874e-05, "loss": 0.8187, "step": 6125 }, { "epoch": 1.9941444372153545, "grad_norm": 1.3054120540618896, "learning_rate": 3.33675494495495e-05, "loss": 0.7966, "step": 6130 }, { "epoch": 1.9957709824333116, "grad_norm": 1.613921046257019, "learning_rate": 3.334322521449066e-05, "loss": 0.8218, "step": 6135 }, { "epoch": 1.9973975276512688, "grad_norm": 1.3561147451400757, "learning_rate": 3.331889208797992e-05, "loss": 0.8058, "step": 6140 }, { "epoch": 1.9990240728692257, "grad_norm": 1.310907006263733, "learning_rate": 3.3294550095949325e-05, "loss": 0.8335, "step": 6145 }, { "epoch": 2.0, "eval_f1": 0.8150318611919625, "eval_loss": 0.41796875, "eval_precision": 0.815227780708104, "eval_recall": 0.8148537635264215, "eval_runtime": 386.7746, "eval_samples_per_second": 1017.22, "eval_steps_per_second": 1.988, "step": 6148 }, { "epoch": 2.000650618087183, "grad_norm": 1.0507025718688965, "learning_rate": 3.327019926434036e-05, "loss": 0.8032, "step": 6150 }, { "epoch": 2.00227716330514, "grad_norm": 1.1561626195907593, "learning_rate": 3.3245839619103916e-05, "loss": 0.7635, "step": 6155 }, { "epoch": 2.003903708523097, "grad_norm": 1.442806601524353, "learning_rate": 3.3221471186200306e-05, "loss": 0.7603, "step": 6160 }, { "epoch": 2.005530253741054, "grad_norm": 1.5425055027008057, "learning_rate": 3.319709399159919e-05, "loss": 0.7817, "step": 6165 }, { "epoch": 2.007156798959011, "grad_norm": 1.192089557647705, "learning_rate": 3.3172708061279564e-05, "loss": 0.7574, "step": 6170 }, { "epoch": 2.008783344176968, "grad_norm": 1.2929043769836426, "learning_rate": 3.314831342122974e-05, "loss": 0.7572, "step": 6175 }, { "epoch": 2.0104098893949254, "grad_norm": 1.5083773136138916, "learning_rate": 3.312391009744732e-05, "loss": 0.777, "step": 6180 }, { "epoch": 2.0120364346128823, "grad_norm": 1.3583420515060425, "learning_rate": 3.309949811593914e-05, "loss": 0.7717, "step": 6185 }, { "epoch": 2.013662979830839, "grad_norm": 1.2936967611312866, "learning_rate": 3.3075077502721266e-05, "loss": 0.7687, "step": 6190 }, { "epoch": 2.0152895250487965, "grad_norm": 1.5030853748321533, "learning_rate": 3.3050648283818985e-05, "loss": 0.7666, "step": 6195 }, { "epoch": 2.0169160702667535, "grad_norm": 1.540038824081421, "learning_rate": 3.3026210485266734e-05, "loss": 0.7806, "step": 6200 }, { "epoch": 2.0185426154847104, "grad_norm": 1.4160559177398682, "learning_rate": 3.30017641331081e-05, "loss": 0.7709, "step": 6205 }, { "epoch": 2.0201691607026677, "grad_norm": 1.4549380540847778, "learning_rate": 3.2977309253395786e-05, "loss": 0.7748, "step": 6210 }, { "epoch": 2.0217957059206246, "grad_norm": 1.3925306797027588, "learning_rate": 3.295284587219159e-05, "loss": 0.7947, "step": 6215 }, { "epoch": 2.0234222511385815, "grad_norm": 1.363878846168518, "learning_rate": 3.292837401556635e-05, "loss": 0.799, "step": 6220 }, { "epoch": 2.025048796356539, "grad_norm": 1.2926141023635864, "learning_rate": 3.290389370959995e-05, "loss": 0.7655, "step": 6225 }, { "epoch": 2.026675341574496, "grad_norm": 1.4668222665786743, "learning_rate": 3.287940498038129e-05, "loss": 0.77, "step": 6230 }, { "epoch": 2.0283018867924527, "grad_norm": 1.2748764753341675, "learning_rate": 3.285490785400822e-05, "loss": 0.7589, "step": 6235 }, { "epoch": 2.02992843201041, "grad_norm": 1.2977699041366577, "learning_rate": 3.283040235658756e-05, "loss": 0.7383, "step": 6240 }, { "epoch": 2.031554977228367, "grad_norm": 1.3510515689849854, "learning_rate": 3.280588851423504e-05, "loss": 0.7697, "step": 6245 }, { "epoch": 2.033181522446324, "grad_norm": 1.4643625020980835, "learning_rate": 3.27813663530753e-05, "loss": 0.7608, "step": 6250 }, { "epoch": 2.0348080676642812, "grad_norm": 1.4025086164474487, "learning_rate": 3.275683589924181e-05, "loss": 0.7664, "step": 6255 }, { "epoch": 2.036434612882238, "grad_norm": 1.2898201942443848, "learning_rate": 3.273229717887692e-05, "loss": 0.7712, "step": 6260 }, { "epoch": 2.038061158100195, "grad_norm": 1.6806743144989014, "learning_rate": 3.270775021813177e-05, "loss": 0.779, "step": 6265 }, { "epoch": 2.0396877033181524, "grad_norm": 1.2774218320846558, "learning_rate": 3.268319504316627e-05, "loss": 0.7388, "step": 6270 }, { "epoch": 2.0413142485361093, "grad_norm": 1.47493577003479, "learning_rate": 3.26586316801491e-05, "loss": 0.7799, "step": 6275 }, { "epoch": 2.0429407937540662, "grad_norm": 1.2892869710922241, "learning_rate": 3.263406015525767e-05, "loss": 0.7577, "step": 6280 }, { "epoch": 2.0445673389720236, "grad_norm": 1.3456687927246094, "learning_rate": 3.2609480494678055e-05, "loss": 0.7595, "step": 6285 }, { "epoch": 2.0461938841899805, "grad_norm": 1.352818250656128, "learning_rate": 3.258489272460507e-05, "loss": 0.7582, "step": 6290 }, { "epoch": 2.0478204294079374, "grad_norm": 1.470285177230835, "learning_rate": 3.256029687124209e-05, "loss": 0.7792, "step": 6295 }, { "epoch": 2.0494469746258948, "grad_norm": 1.4765840768814087, "learning_rate": 3.2535692960801147e-05, "loss": 0.7917, "step": 6300 }, { "epoch": 2.0510735198438517, "grad_norm": 1.3707880973815918, "learning_rate": 3.2511081019502875e-05, "loss": 0.7572, "step": 6305 }, { "epoch": 2.0527000650618086, "grad_norm": 1.4382959604263306, "learning_rate": 3.248646107357643e-05, "loss": 0.7795, "step": 6310 }, { "epoch": 2.054326610279766, "grad_norm": 1.493988037109375, "learning_rate": 3.2461833149259516e-05, "loss": 0.7838, "step": 6315 }, { "epoch": 2.055953155497723, "grad_norm": 1.3789554834365845, "learning_rate": 3.243719727279834e-05, "loss": 0.7721, "step": 6320 }, { "epoch": 2.0575797007156797, "grad_norm": 1.6243317127227783, "learning_rate": 3.241255347044759e-05, "loss": 0.7719, "step": 6325 }, { "epoch": 2.059206245933637, "grad_norm": 1.3848278522491455, "learning_rate": 3.2387901768470375e-05, "loss": 0.7765, "step": 6330 }, { "epoch": 2.060832791151594, "grad_norm": 1.5097062587738037, "learning_rate": 3.236324219313826e-05, "loss": 0.746, "step": 6335 }, { "epoch": 2.062459336369551, "grad_norm": 1.6452648639678955, "learning_rate": 3.2338574770731174e-05, "loss": 0.7738, "step": 6340 }, { "epoch": 2.0640858815875083, "grad_norm": 1.4471609592437744, "learning_rate": 3.231389952753742e-05, "loss": 0.754, "step": 6345 }, { "epoch": 2.065712426805465, "grad_norm": 1.3167904615402222, "learning_rate": 3.2289216489853613e-05, "loss": 0.7546, "step": 6350 }, { "epoch": 2.067338972023422, "grad_norm": 1.3347926139831543, "learning_rate": 3.226452568398471e-05, "loss": 0.781, "step": 6355 }, { "epoch": 2.0689655172413794, "grad_norm": 1.395614743232727, "learning_rate": 3.223982713624394e-05, "loss": 0.783, "step": 6360 }, { "epoch": 2.0705920624593364, "grad_norm": 1.252864122390747, "learning_rate": 3.221512087295275e-05, "loss": 0.7713, "step": 6365 }, { "epoch": 2.0722186076772933, "grad_norm": 1.346101999282837, "learning_rate": 3.2190406920440847e-05, "loss": 0.7626, "step": 6370 }, { "epoch": 2.0738451528952506, "grad_norm": 1.4899871349334717, "learning_rate": 3.216568530504611e-05, "loss": 0.7903, "step": 6375 }, { "epoch": 2.0754716981132075, "grad_norm": 1.4338340759277344, "learning_rate": 3.21409560531146e-05, "loss": 0.7563, "step": 6380 }, { "epoch": 2.0770982433311644, "grad_norm": 1.506343126296997, "learning_rate": 3.21162191910005e-05, "loss": 0.7708, "step": 6385 }, { "epoch": 2.078724788549122, "grad_norm": 1.2952907085418701, "learning_rate": 3.2091474745066116e-05, "loss": 0.8002, "step": 6390 }, { "epoch": 2.0803513337670787, "grad_norm": 1.403756022453308, "learning_rate": 3.2066722741681845e-05, "loss": 0.7683, "step": 6395 }, { "epoch": 2.0819778789850356, "grad_norm": 1.3915510177612305, "learning_rate": 3.204196320722609e-05, "loss": 0.7678, "step": 6400 }, { "epoch": 2.083604424202993, "grad_norm": 1.4182417392730713, "learning_rate": 3.2017196168085345e-05, "loss": 0.7699, "step": 6405 }, { "epoch": 2.08523096942095, "grad_norm": 1.3907455205917358, "learning_rate": 3.199242165065406e-05, "loss": 0.7807, "step": 6410 }, { "epoch": 2.086857514638907, "grad_norm": 1.4530235528945923, "learning_rate": 3.196763968133466e-05, "loss": 0.7667, "step": 6415 }, { "epoch": 2.088484059856864, "grad_norm": 1.5396872758865356, "learning_rate": 3.194285028653754e-05, "loss": 0.7564, "step": 6420 }, { "epoch": 2.090110605074821, "grad_norm": 1.75229811668396, "learning_rate": 3.191805349268097e-05, "loss": 0.7695, "step": 6425 }, { "epoch": 2.091737150292778, "grad_norm": 1.3022738695144653, "learning_rate": 3.189324932619112e-05, "loss": 0.7969, "step": 6430 }, { "epoch": 2.0933636955107353, "grad_norm": 1.0917425155639648, "learning_rate": 3.1868437813502026e-05, "loss": 0.8016, "step": 6435 }, { "epoch": 2.094990240728692, "grad_norm": 1.2338515520095825, "learning_rate": 3.184361898105554e-05, "loss": 0.7925, "step": 6440 }, { "epoch": 2.096616785946649, "grad_norm": 1.453049659729004, "learning_rate": 3.1818792855301316e-05, "loss": 0.7658, "step": 6445 }, { "epoch": 2.0982433311646065, "grad_norm": 1.3018817901611328, "learning_rate": 3.17939594626968e-05, "loss": 0.7598, "step": 6450 }, { "epoch": 2.0998698763825634, "grad_norm": 1.6936924457550049, "learning_rate": 3.177408753426531e-05, "loss": 0.786, "step": 6455 }, { "epoch": 2.1014964216005203, "grad_norm": 1.354274868965149, "learning_rate": 3.174924112802767e-05, "loss": 0.7706, "step": 6460 }, { "epoch": 2.1031229668184777, "grad_norm": 1.38499116897583, "learning_rate": 3.172438752906165e-05, "loss": 0.7846, "step": 6465 }, { "epoch": 2.1047495120364346, "grad_norm": 1.4282853603363037, "learning_rate": 3.169952676385395e-05, "loss": 0.7851, "step": 6470 }, { "epoch": 2.1063760572543915, "grad_norm": 1.2595257759094238, "learning_rate": 3.167465885889892e-05, "loss": 0.7339, "step": 6475 }, { "epoch": 2.108002602472349, "grad_norm": 1.4170726537704468, "learning_rate": 3.164978384069852e-05, "loss": 0.7561, "step": 6480 }, { "epoch": 2.1096291476903057, "grad_norm": 1.4412657022476196, "learning_rate": 3.162490173576227e-05, "loss": 0.7628, "step": 6485 }, { "epoch": 2.1112556929082626, "grad_norm": 1.4905624389648438, "learning_rate": 3.1600012570607284e-05, "loss": 0.7793, "step": 6490 }, { "epoch": 2.11288223812622, "grad_norm": 1.426479697227478, "learning_rate": 3.157511637175815e-05, "loss": 0.7489, "step": 6495 }, { "epoch": 2.114508783344177, "grad_norm": 1.3552989959716797, "learning_rate": 3.155021316574699e-05, "loss": 0.7654, "step": 6500 }, { "epoch": 2.116135328562134, "grad_norm": 1.1724817752838135, "learning_rate": 3.152530297911337e-05, "loss": 0.7457, "step": 6505 }, { "epoch": 2.117761873780091, "grad_norm": 1.3293735980987549, "learning_rate": 3.150038583840431e-05, "loss": 0.7654, "step": 6510 }, { "epoch": 2.119388418998048, "grad_norm": 1.239430546760559, "learning_rate": 3.147546177017425e-05, "loss": 0.7476, "step": 6515 }, { "epoch": 2.121014964216005, "grad_norm": 1.5355035066604614, "learning_rate": 3.1450530800984965e-05, "loss": 0.7801, "step": 6520 }, { "epoch": 2.1226415094339623, "grad_norm": 1.4376792907714844, "learning_rate": 3.142559295740566e-05, "loss": 0.7525, "step": 6525 }, { "epoch": 2.1242680546519193, "grad_norm": 1.6137937307357788, "learning_rate": 3.1400648266012794e-05, "loss": 0.7876, "step": 6530 }, { "epoch": 2.125894599869876, "grad_norm": 1.3480271100997925, "learning_rate": 3.137569675339016e-05, "loss": 0.7759, "step": 6535 }, { "epoch": 2.1275211450878335, "grad_norm": 1.1955807209014893, "learning_rate": 3.1350738446128826e-05, "loss": 0.7565, "step": 6540 }, { "epoch": 2.1291476903057904, "grad_norm": 1.3620483875274658, "learning_rate": 3.132577337082708e-05, "loss": 0.7779, "step": 6545 }, { "epoch": 2.130774235523748, "grad_norm": 1.617068886756897, "learning_rate": 3.130080155409043e-05, "loss": 0.7738, "step": 6550 }, { "epoch": 2.1324007807417047, "grad_norm": 2.0417439937591553, "learning_rate": 3.1275823022531584e-05, "loss": 0.7813, "step": 6555 }, { "epoch": 2.1340273259596616, "grad_norm": 1.7474989891052246, "learning_rate": 3.125083780277038e-05, "loss": 0.7759, "step": 6560 }, { "epoch": 2.1356538711776185, "grad_norm": 1.4056528806686401, "learning_rate": 3.12258459214338e-05, "loss": 0.7511, "step": 6565 }, { "epoch": 2.137280416395576, "grad_norm": 1.528346300125122, "learning_rate": 3.120084740515591e-05, "loss": 0.7497, "step": 6570 }, { "epoch": 2.1389069616135328, "grad_norm": 1.6084930896759033, "learning_rate": 3.117584228057789e-05, "loss": 0.7763, "step": 6575 }, { "epoch": 2.14053350683149, "grad_norm": 1.431966781616211, "learning_rate": 3.115083057434791e-05, "loss": 0.781, "step": 6580 }, { "epoch": 2.142160052049447, "grad_norm": 1.5099892616271973, "learning_rate": 3.112581231312118e-05, "loss": 0.7534, "step": 6585 }, { "epoch": 2.143786597267404, "grad_norm": 1.4613206386566162, "learning_rate": 3.1100787523559884e-05, "loss": 0.7775, "step": 6590 }, { "epoch": 2.145413142485361, "grad_norm": 1.5528578758239746, "learning_rate": 3.107575623233318e-05, "loss": 0.7872, "step": 6595 }, { "epoch": 2.147039687703318, "grad_norm": 1.3113067150115967, "learning_rate": 3.105071846611714e-05, "loss": 0.7916, "step": 6600 }, { "epoch": 2.148666232921275, "grad_norm": 1.4118595123291016, "learning_rate": 3.102567425159475e-05, "loss": 0.7353, "step": 6605 }, { "epoch": 2.1502927781392325, "grad_norm": 1.1989558935165405, "learning_rate": 3.1000623615455846e-05, "loss": 0.7815, "step": 6610 }, { "epoch": 2.1519193233571894, "grad_norm": 1.4240597486495972, "learning_rate": 3.097556658439713e-05, "loss": 0.7496, "step": 6615 }, { "epoch": 2.1535458685751463, "grad_norm": 1.3959871530532837, "learning_rate": 3.0950503185122116e-05, "loss": 0.771, "step": 6620 }, { "epoch": 2.1551724137931036, "grad_norm": 1.3204529285430908, "learning_rate": 3.092543344434109e-05, "loss": 0.7679, "step": 6625 }, { "epoch": 2.1567989590110606, "grad_norm": 1.4510406255722046, "learning_rate": 3.09003573887711e-05, "loss": 0.7781, "step": 6630 }, { "epoch": 2.1584255042290175, "grad_norm": 1.2739536762237549, "learning_rate": 3.087527504513595e-05, "loss": 0.7543, "step": 6635 }, { "epoch": 2.160052049446975, "grad_norm": 1.5925034284591675, "learning_rate": 3.085018644016611e-05, "loss": 0.7949, "step": 6640 }, { "epoch": 2.1616785946649317, "grad_norm": 1.2139370441436768, "learning_rate": 3.0825091600598746e-05, "loss": 0.7673, "step": 6645 }, { "epoch": 2.1633051398828886, "grad_norm": 1.3349965810775757, "learning_rate": 3.079999055317764e-05, "loss": 0.761, "step": 6650 }, { "epoch": 2.164931685100846, "grad_norm": 1.5957131385803223, "learning_rate": 3.077488332465322e-05, "loss": 0.7865, "step": 6655 }, { "epoch": 2.166558230318803, "grad_norm": 1.5138154029846191, "learning_rate": 3.074976994178251e-05, "loss": 0.8093, "step": 6660 }, { "epoch": 2.16818477553676, "grad_norm": 1.36934494972229, "learning_rate": 3.0724650431329035e-05, "loss": 0.7971, "step": 6665 }, { "epoch": 2.169811320754717, "grad_norm": 1.3482638597488403, "learning_rate": 3.069952482006293e-05, "loss": 0.7776, "step": 6670 }, { "epoch": 2.171437865972674, "grad_norm": 1.3168878555297852, "learning_rate": 3.067439313476076e-05, "loss": 0.7649, "step": 6675 }, { "epoch": 2.173064411190631, "grad_norm": 1.4563496112823486, "learning_rate": 3.06492554022056e-05, "loss": 0.8004, "step": 6680 }, { "epoch": 2.1746909564085883, "grad_norm": 2.1240453720092773, "learning_rate": 3.062411164918697e-05, "loss": 0.7997, "step": 6685 }, { "epoch": 2.1763175016265452, "grad_norm": 1.4619108438491821, "learning_rate": 3.05989619025008e-05, "loss": 0.764, "step": 6690 }, { "epoch": 2.177944046844502, "grad_norm": 1.4473713636398315, "learning_rate": 3.0573806188949386e-05, "loss": 0.7471, "step": 6695 }, { "epoch": 2.1795705920624595, "grad_norm": 1.5856972932815552, "learning_rate": 3.054864453534144e-05, "loss": 0.7979, "step": 6700 }, { "epoch": 2.1811971372804164, "grad_norm": 1.1720261573791504, "learning_rate": 3.052347696849193e-05, "loss": 0.7794, "step": 6705 }, { "epoch": 2.1828236824983733, "grad_norm": 1.514341950416565, "learning_rate": 3.0498303515222187e-05, "loss": 0.7635, "step": 6710 }, { "epoch": 2.1844502277163307, "grad_norm": 1.4805790185928345, "learning_rate": 3.0473124202359783e-05, "loss": 0.7591, "step": 6715 }, { "epoch": 2.1860767729342876, "grad_norm": 1.2881990671157837, "learning_rate": 3.044793905673855e-05, "loss": 0.7392, "step": 6720 }, { "epoch": 2.1877033181522445, "grad_norm": 1.7704826593399048, "learning_rate": 3.0422748105198525e-05, "loss": 0.8033, "step": 6725 }, { "epoch": 2.189329863370202, "grad_norm": 1.3933912515640259, "learning_rate": 3.0397551374585926e-05, "loss": 0.7605, "step": 6730 }, { "epoch": 2.1909564085881588, "grad_norm": 1.2783641815185547, "learning_rate": 3.037234889175316e-05, "loss": 0.7604, "step": 6735 }, { "epoch": 2.1925829538061157, "grad_norm": 1.2490899562835693, "learning_rate": 3.034714068355874e-05, "loss": 0.7784, "step": 6740 }, { "epoch": 2.194209499024073, "grad_norm": 1.5016183853149414, "learning_rate": 3.0321926776867275e-05, "loss": 0.7802, "step": 6745 }, { "epoch": 2.19583604424203, "grad_norm": 1.7094157934188843, "learning_rate": 3.0296707198549466e-05, "loss": 0.7984, "step": 6750 }, { "epoch": 2.197462589459987, "grad_norm": 1.2087597846984863, "learning_rate": 3.0271481975482053e-05, "loss": 0.7751, "step": 6755 }, { "epoch": 2.199089134677944, "grad_norm": 1.282736897468567, "learning_rate": 3.0246251134547777e-05, "loss": 0.7698, "step": 6760 }, { "epoch": 2.200715679895901, "grad_norm": 2.0176427364349365, "learning_rate": 3.0221014702635388e-05, "loss": 0.7591, "step": 6765 }, { "epoch": 2.202342225113858, "grad_norm": 1.4624202251434326, "learning_rate": 3.0195772706639574e-05, "loss": 0.7659, "step": 6770 }, { "epoch": 2.2039687703318154, "grad_norm": 1.5746955871582031, "learning_rate": 3.0170525173460977e-05, "loss": 0.7806, "step": 6775 }, { "epoch": 2.2055953155497723, "grad_norm": 1.5319770574569702, "learning_rate": 3.0145272130006107e-05, "loss": 0.7684, "step": 6780 }, { "epoch": 2.207221860767729, "grad_norm": 1.3586204051971436, "learning_rate": 3.0120013603187386e-05, "loss": 0.7382, "step": 6785 }, { "epoch": 2.2088484059856865, "grad_norm": 1.3229924440383911, "learning_rate": 3.009474961992304e-05, "loss": 0.7634, "step": 6790 }, { "epoch": 2.2104749512036435, "grad_norm": 1.5725353956222534, "learning_rate": 3.0069480207137136e-05, "loss": 0.7564, "step": 6795 }, { "epoch": 2.2121014964216004, "grad_norm": 1.5107423067092896, "learning_rate": 3.0044205391759518e-05, "loss": 0.7927, "step": 6800 }, { "epoch": 2.2137280416395577, "grad_norm": 1.59178626537323, "learning_rate": 3.0018925200725795e-05, "loss": 0.7839, "step": 6805 }, { "epoch": 2.2153545868575146, "grad_norm": 1.341594934463501, "learning_rate": 2.9993639660977285e-05, "loss": 0.7484, "step": 6810 }, { "epoch": 2.2169811320754715, "grad_norm": 1.0305352210998535, "learning_rate": 2.9968348799461043e-05, "loss": 0.7498, "step": 6815 }, { "epoch": 2.218607677293429, "grad_norm": 1.607041597366333, "learning_rate": 2.9943052643129755e-05, "loss": 0.7485, "step": 6820 }, { "epoch": 2.220234222511386, "grad_norm": 1.4778155088424683, "learning_rate": 2.991775121894177e-05, "loss": 0.7926, "step": 6825 }, { "epoch": 2.2218607677293427, "grad_norm": 1.3483030796051025, "learning_rate": 2.9892444553861053e-05, "loss": 0.8156, "step": 6830 }, { "epoch": 2.2234873129473, "grad_norm": 1.4081867933273315, "learning_rate": 2.986713267485715e-05, "loss": 0.7772, "step": 6835 }, { "epoch": 2.225113858165257, "grad_norm": 1.4287421703338623, "learning_rate": 2.9841815608905156e-05, "loss": 0.7292, "step": 6840 }, { "epoch": 2.226740403383214, "grad_norm": 1.4521280527114868, "learning_rate": 2.9816493382985712e-05, "loss": 0.7887, "step": 6845 }, { "epoch": 2.2283669486011712, "grad_norm": 1.4874500036239624, "learning_rate": 2.9791166024084942e-05, "loss": 0.7502, "step": 6850 }, { "epoch": 2.229993493819128, "grad_norm": 1.5421522855758667, "learning_rate": 2.976583355919444e-05, "loss": 0.7875, "step": 6855 }, { "epoch": 2.231620039037085, "grad_norm": 1.4409303665161133, "learning_rate": 2.974049601531126e-05, "loss": 0.7916, "step": 6860 }, { "epoch": 2.2332465842550424, "grad_norm": 1.1605232954025269, "learning_rate": 2.9715153419437845e-05, "loss": 0.7719, "step": 6865 }, { "epoch": 2.2348731294729993, "grad_norm": 1.3349542617797852, "learning_rate": 2.968980579858203e-05, "loss": 0.7768, "step": 6870 }, { "epoch": 2.2364996746909562, "grad_norm": 1.8573849201202393, "learning_rate": 2.9664453179757022e-05, "loss": 0.7762, "step": 6875 }, { "epoch": 2.2381262199089136, "grad_norm": 1.3962373733520508, "learning_rate": 2.963909558998133e-05, "loss": 0.7292, "step": 6880 }, { "epoch": 2.2397527651268705, "grad_norm": 1.2101552486419678, "learning_rate": 2.961373305627877e-05, "loss": 0.7455, "step": 6885 }, { "epoch": 2.2413793103448274, "grad_norm": 1.4896292686462402, "learning_rate": 2.9588365605678438e-05, "loss": 0.736, "step": 6890 }, { "epoch": 2.2430058555627848, "grad_norm": 1.4202208518981934, "learning_rate": 2.9562993265214644e-05, "loss": 0.7652, "step": 6895 }, { "epoch": 2.2446324007807417, "grad_norm": 1.3560926914215088, "learning_rate": 2.9537616061926938e-05, "loss": 0.7562, "step": 6900 }, { "epoch": 2.2462589459986986, "grad_norm": 1.6637440919876099, "learning_rate": 2.9512234022860018e-05, "loss": 0.7622, "step": 6905 }, { "epoch": 2.247885491216656, "grad_norm": 1.53335440158844, "learning_rate": 2.9486847175063786e-05, "loss": 0.7551, "step": 6910 }, { "epoch": 2.249512036434613, "grad_norm": 1.5960514545440674, "learning_rate": 2.9461455545593226e-05, "loss": 0.7767, "step": 6915 }, { "epoch": 2.2511385816525697, "grad_norm": 1.8823623657226562, "learning_rate": 2.9436059161508423e-05, "loss": 0.801, "step": 6920 }, { "epoch": 2.252765126870527, "grad_norm": 1.4821723699569702, "learning_rate": 2.9410658049874558e-05, "loss": 0.7682, "step": 6925 }, { "epoch": 2.254391672088484, "grad_norm": 1.3478459119796753, "learning_rate": 2.9385252237761806e-05, "loss": 0.8058, "step": 6930 }, { "epoch": 2.2560182173064414, "grad_norm": 1.7460572719573975, "learning_rate": 2.935984175224539e-05, "loss": 0.7403, "step": 6935 }, { "epoch": 2.2576447625243983, "grad_norm": 1.3906254768371582, "learning_rate": 2.933442662040549e-05, "loss": 0.785, "step": 6940 }, { "epoch": 2.259271307742355, "grad_norm": 1.7499678134918213, "learning_rate": 2.9309006869327254e-05, "loss": 0.7722, "step": 6945 }, { "epoch": 2.260897852960312, "grad_norm": 1.3640332221984863, "learning_rate": 2.9283582526100738e-05, "loss": 0.7594, "step": 6950 }, { "epoch": 2.2625243981782694, "grad_norm": 1.395585060119629, "learning_rate": 2.92581536178209e-05, "loss": 0.7559, "step": 6955 }, { "epoch": 2.2641509433962264, "grad_norm": 1.4170963764190674, "learning_rate": 2.9232720171587564e-05, "loss": 0.7881, "step": 6960 }, { "epoch": 2.2657774886141837, "grad_norm": 1.257938027381897, "learning_rate": 2.9207282214505383e-05, "loss": 0.7859, "step": 6965 }, { "epoch": 2.2674040338321406, "grad_norm": 1.4764996767044067, "learning_rate": 2.9181839773683827e-05, "loss": 0.7568, "step": 6970 }, { "epoch": 2.2690305790500975, "grad_norm": 1.4710463285446167, "learning_rate": 2.9156392876237138e-05, "loss": 0.7927, "step": 6975 }, { "epoch": 2.2706571242680544, "grad_norm": 1.6916797161102295, "learning_rate": 2.9130941549284307e-05, "loss": 0.7615, "step": 6980 }, { "epoch": 2.272283669486012, "grad_norm": 1.5723096132278442, "learning_rate": 2.9105485819949045e-05, "loss": 0.7752, "step": 6985 }, { "epoch": 2.2739102147039687, "grad_norm": 1.2084461450576782, "learning_rate": 2.9080025715359773e-05, "loss": 0.7542, "step": 6990 }, { "epoch": 2.275536759921926, "grad_norm": 1.5358495712280273, "learning_rate": 2.905456126264954e-05, "loss": 0.7587, "step": 6995 }, { "epoch": 2.277163305139883, "grad_norm": 1.9912947416305542, "learning_rate": 2.9029092488956045e-05, "loss": 0.7612, "step": 7000 }, { "epoch": 2.27878985035784, "grad_norm": 1.6025663614273071, "learning_rate": 2.9003619421421612e-05, "loss": 0.7867, "step": 7005 }, { "epoch": 2.280416395575797, "grad_norm": 1.4013586044311523, "learning_rate": 2.8978142087193112e-05, "loss": 0.7663, "step": 7010 }, { "epoch": 2.282042940793754, "grad_norm": 1.4046440124511719, "learning_rate": 2.8952660513421976e-05, "loss": 0.7369, "step": 7015 }, { "epoch": 2.283669486011711, "grad_norm": 1.669668197631836, "learning_rate": 2.8927174727264154e-05, "loss": 0.7819, "step": 7020 }, { "epoch": 2.2852960312296684, "grad_norm": 1.27577543258667, "learning_rate": 2.8901684755880087e-05, "loss": 0.7878, "step": 7025 }, { "epoch": 2.2869225764476253, "grad_norm": 1.555192232131958, "learning_rate": 2.8876190626434664e-05, "loss": 0.7575, "step": 7030 }, { "epoch": 2.288549121665582, "grad_norm": 1.3229814767837524, "learning_rate": 2.885069236609722e-05, "loss": 0.7826, "step": 7035 }, { "epoch": 2.290175666883539, "grad_norm": 1.2587215900421143, "learning_rate": 2.8825190002041474e-05, "loss": 0.7802, "step": 7040 }, { "epoch": 2.2918022121014965, "grad_norm": 1.6922944784164429, "learning_rate": 2.8799683561445545e-05, "loss": 0.8017, "step": 7045 }, { "epoch": 2.2934287573194534, "grad_norm": 1.4293512105941772, "learning_rate": 2.8774173071491874e-05, "loss": 0.7646, "step": 7050 }, { "epoch": 2.2950553025374107, "grad_norm": 1.297987937927246, "learning_rate": 2.874865855936722e-05, "loss": 0.7853, "step": 7055 }, { "epoch": 2.2966818477553677, "grad_norm": 1.4629950523376465, "learning_rate": 2.8723140052262647e-05, "loss": 0.7531, "step": 7060 }, { "epoch": 2.2983083929733246, "grad_norm": 1.5489518642425537, "learning_rate": 2.8697617577373447e-05, "loss": 0.7609, "step": 7065 }, { "epoch": 2.2999349381912815, "grad_norm": 1.6277120113372803, "learning_rate": 2.8672091161899172e-05, "loss": 0.7746, "step": 7070 }, { "epoch": 2.301561483409239, "grad_norm": 1.2687867879867554, "learning_rate": 2.864656083304355e-05, "loss": 0.7519, "step": 7075 }, { "epoch": 2.3031880286271957, "grad_norm": 1.3497977256774902, "learning_rate": 2.8621026618014483e-05, "loss": 0.8137, "step": 7080 }, { "epoch": 2.304814573845153, "grad_norm": 1.524418592453003, "learning_rate": 2.859548854402403e-05, "loss": 0.7841, "step": 7085 }, { "epoch": 2.30644111906311, "grad_norm": 1.4669740200042725, "learning_rate": 2.8569946638288343e-05, "loss": 0.7809, "step": 7090 }, { "epoch": 2.308067664281067, "grad_norm": 1.314644455909729, "learning_rate": 2.8544400928027665e-05, "loss": 0.7995, "step": 7095 }, { "epoch": 2.3096942094990243, "grad_norm": 1.278306484222412, "learning_rate": 2.85188514404663e-05, "loss": 0.7882, "step": 7100 }, { "epoch": 2.311320754716981, "grad_norm": 1.3126131296157837, "learning_rate": 2.8493298202832568e-05, "loss": 0.7562, "step": 7105 }, { "epoch": 2.312947299934938, "grad_norm": 1.2307605743408203, "learning_rate": 2.8467741242358794e-05, "loss": 0.7637, "step": 7110 }, { "epoch": 2.3145738451528954, "grad_norm": 1.2585320472717285, "learning_rate": 2.844218058628126e-05, "loss": 0.7543, "step": 7115 }, { "epoch": 2.3162003903708523, "grad_norm": 1.6120080947875977, "learning_rate": 2.84166162618402e-05, "loss": 0.7649, "step": 7120 }, { "epoch": 2.3178269355888093, "grad_norm": 1.5300936698913574, "learning_rate": 2.8391048296279742e-05, "loss": 0.7754, "step": 7125 }, { "epoch": 2.3194534808067666, "grad_norm": 1.4339016675949097, "learning_rate": 2.8365476716847906e-05, "loss": 0.7855, "step": 7130 }, { "epoch": 2.3210800260247235, "grad_norm": 1.3723671436309814, "learning_rate": 2.833990155079656e-05, "loss": 0.7737, "step": 7135 }, { "epoch": 2.3227065712426804, "grad_norm": 1.5506298542022705, "learning_rate": 2.8314322825381394e-05, "loss": 0.7729, "step": 7140 }, { "epoch": 2.324333116460638, "grad_norm": 1.3810886144638062, "learning_rate": 2.8288740567861888e-05, "loss": 0.7659, "step": 7145 }, { "epoch": 2.3259596616785947, "grad_norm": 1.3095033168792725, "learning_rate": 2.8263154805501297e-05, "loss": 0.821, "step": 7150 }, { "epoch": 2.3275862068965516, "grad_norm": 1.2107062339782715, "learning_rate": 2.8237565565566592e-05, "loss": 0.7833, "step": 7155 }, { "epoch": 2.329212752114509, "grad_norm": 1.4244047403335571, "learning_rate": 2.821197287532847e-05, "loss": 0.7688, "step": 7160 }, { "epoch": 2.330839297332466, "grad_norm": 1.425652265548706, "learning_rate": 2.8186376762061288e-05, "loss": 0.7428, "step": 7165 }, { "epoch": 2.3324658425504228, "grad_norm": 1.4377485513687134, "learning_rate": 2.8160777253043074e-05, "loss": 0.7832, "step": 7170 }, { "epoch": 2.33409238776838, "grad_norm": 1.428046464920044, "learning_rate": 2.8135174375555446e-05, "loss": 0.7573, "step": 7175 }, { "epoch": 2.335718932986337, "grad_norm": 1.6208500862121582, "learning_rate": 2.8109568156883633e-05, "loss": 0.7441, "step": 7180 }, { "epoch": 2.337345478204294, "grad_norm": 1.5621751546859741, "learning_rate": 2.808395862431642e-05, "loss": 0.7542, "step": 7185 }, { "epoch": 2.3389720234222513, "grad_norm": 1.5660641193389893, "learning_rate": 2.8058345805146113e-05, "loss": 0.7478, "step": 7190 }, { "epoch": 2.340598568640208, "grad_norm": 1.4224023818969727, "learning_rate": 2.8032729726668538e-05, "loss": 0.7431, "step": 7195 }, { "epoch": 2.342225113858165, "grad_norm": 1.304904580116272, "learning_rate": 2.800711041618298e-05, "loss": 0.7733, "step": 7200 }, { "epoch": 2.3438516590761225, "grad_norm": 1.5656379461288452, "learning_rate": 2.7981487900992182e-05, "loss": 0.7431, "step": 7205 }, { "epoch": 2.3454782042940794, "grad_norm": 1.5410288572311401, "learning_rate": 2.7955862208402283e-05, "loss": 0.7836, "step": 7210 }, { "epoch": 2.3471047495120363, "grad_norm": 1.4904292821884155, "learning_rate": 2.7930233365722825e-05, "loss": 0.7934, "step": 7215 }, { "epoch": 2.3487312947299936, "grad_norm": 1.3335866928100586, "learning_rate": 2.7904601400266707e-05, "loss": 0.7646, "step": 7220 }, { "epoch": 2.3503578399479506, "grad_norm": 1.290299654006958, "learning_rate": 2.7878966339350132e-05, "loss": 0.7587, "step": 7225 }, { "epoch": 2.3519843851659075, "grad_norm": 1.2934328317642212, "learning_rate": 2.7853328210292646e-05, "loss": 0.7742, "step": 7230 }, { "epoch": 2.353610930383865, "grad_norm": 1.319710373878479, "learning_rate": 2.7827687040417023e-05, "loss": 0.7394, "step": 7235 }, { "epoch": 2.3552374756018217, "grad_norm": 1.3711256980895996, "learning_rate": 2.7802042857049292e-05, "loss": 0.749, "step": 7240 }, { "epoch": 2.3568640208197786, "grad_norm": 1.6776881217956543, "learning_rate": 2.7776395687518703e-05, "loss": 0.7825, "step": 7245 }, { "epoch": 2.358490566037736, "grad_norm": 1.2112609148025513, "learning_rate": 2.7750745559157682e-05, "loss": 0.7671, "step": 7250 }, { "epoch": 2.360117111255693, "grad_norm": 1.3415905237197876, "learning_rate": 2.7725092499301797e-05, "loss": 0.745, "step": 7255 }, { "epoch": 2.36174365647365, "grad_norm": 1.3541368246078491, "learning_rate": 2.769943653528976e-05, "loss": 0.7645, "step": 7260 }, { "epoch": 2.363370201691607, "grad_norm": 1.4715090990066528, "learning_rate": 2.767377769446336e-05, "loss": 0.7702, "step": 7265 }, { "epoch": 2.364996746909564, "grad_norm": 1.4047423601150513, "learning_rate": 2.7648116004167457e-05, "loss": 0.7793, "step": 7270 }, { "epoch": 2.366623292127521, "grad_norm": 1.4258458614349365, "learning_rate": 2.7622451491749966e-05, "loss": 0.7631, "step": 7275 }, { "epoch": 2.3682498373454783, "grad_norm": 1.459695816040039, "learning_rate": 2.7596784184561787e-05, "loss": 0.8368, "step": 7280 }, { "epoch": 2.3698763825634352, "grad_norm": 1.2327107191085815, "learning_rate": 2.75711141099568e-05, "loss": 0.7713, "step": 7285 }, { "epoch": 2.371502927781392, "grad_norm": 1.6188944578170776, "learning_rate": 2.754544129529184e-05, "loss": 0.768, "step": 7290 }, { "epoch": 2.3731294729993495, "grad_norm": 1.8441368341445923, "learning_rate": 2.7519765767926668e-05, "loss": 0.7937, "step": 7295 }, { "epoch": 2.3747560182173064, "grad_norm": 1.3260788917541504, "learning_rate": 2.749408755522393e-05, "loss": 0.7925, "step": 7300 }, { "epoch": 2.3763825634352633, "grad_norm": 1.3866006135940552, "learning_rate": 2.7468406684549123e-05, "loss": 0.7974, "step": 7305 }, { "epoch": 2.3780091086532207, "grad_norm": 1.4514261484146118, "learning_rate": 2.7442723183270598e-05, "loss": 0.7857, "step": 7310 }, { "epoch": 2.3796356538711776, "grad_norm": 1.429544448852539, "learning_rate": 2.7417037078759495e-05, "loss": 0.7466, "step": 7315 }, { "epoch": 2.3812621990891345, "grad_norm": 1.7041850090026855, "learning_rate": 2.7391348398389734e-05, "loss": 0.769, "step": 7320 }, { "epoch": 2.382888744307092, "grad_norm": 1.4491127729415894, "learning_rate": 2.736565716953797e-05, "loss": 0.7513, "step": 7325 }, { "epoch": 2.3845152895250488, "grad_norm": 1.514446496963501, "learning_rate": 2.7339963419583603e-05, "loss": 0.7548, "step": 7330 }, { "epoch": 2.3861418347430057, "grad_norm": 1.5486931800842285, "learning_rate": 2.7314267175908675e-05, "loss": 0.7489, "step": 7335 }, { "epoch": 2.387768379960963, "grad_norm": 1.508542537689209, "learning_rate": 2.7288568465897917e-05, "loss": 0.7535, "step": 7340 }, { "epoch": 2.38939492517892, "grad_norm": 1.4203613996505737, "learning_rate": 2.7262867316938688e-05, "loss": 0.7729, "step": 7345 }, { "epoch": 2.391021470396877, "grad_norm": 1.3317420482635498, "learning_rate": 2.723716375642093e-05, "loss": 0.7551, "step": 7350 }, { "epoch": 2.392648015614834, "grad_norm": 1.3810874223709106, "learning_rate": 2.7211457811737167e-05, "loss": 0.772, "step": 7355 }, { "epoch": 2.394274560832791, "grad_norm": 1.2844486236572266, "learning_rate": 2.7185749510282467e-05, "loss": 0.7833, "step": 7360 }, { "epoch": 2.395901106050748, "grad_norm": 1.278760552406311, "learning_rate": 2.7160038879454392e-05, "loss": 0.7412, "step": 7365 }, { "epoch": 2.3975276512687054, "grad_norm": 1.363017201423645, "learning_rate": 2.7134325946653e-05, "loss": 0.7804, "step": 7370 }, { "epoch": 2.3991541964866623, "grad_norm": 1.4252583980560303, "learning_rate": 2.710861073928081e-05, "loss": 0.7453, "step": 7375 }, { "epoch": 2.4007807417046196, "grad_norm": 1.3902082443237305, "learning_rate": 2.7082893284742748e-05, "loss": 0.7699, "step": 7380 }, { "epoch": 2.4024072869225765, "grad_norm": 1.393570065498352, "learning_rate": 2.705717361044614e-05, "loss": 0.7462, "step": 7385 }, { "epoch": 2.4040338321405335, "grad_norm": 1.2459912300109863, "learning_rate": 2.7031451743800684e-05, "loss": 0.77, "step": 7390 }, { "epoch": 2.4056603773584904, "grad_norm": 1.5705370903015137, "learning_rate": 2.7005727712218416e-05, "loss": 0.7365, "step": 7395 }, { "epoch": 2.4072869225764477, "grad_norm": 1.396109938621521, "learning_rate": 2.6980001543113652e-05, "loss": 0.7711, "step": 7400 }, { "epoch": 2.4089134677944046, "grad_norm": 1.2370628118515015, "learning_rate": 2.6954273263903028e-05, "loss": 0.7885, "step": 7405 }, { "epoch": 2.410540013012362, "grad_norm": 1.4108110666275024, "learning_rate": 2.6928542902005406e-05, "loss": 0.7355, "step": 7410 }, { "epoch": 2.412166558230319, "grad_norm": 1.753672480583191, "learning_rate": 2.6902810484841856e-05, "loss": 0.7526, "step": 7415 }, { "epoch": 2.413793103448276, "grad_norm": 1.4011647701263428, "learning_rate": 2.6877076039835663e-05, "loss": 0.7833, "step": 7420 }, { "epoch": 2.4154196486662327, "grad_norm": 1.3792692422866821, "learning_rate": 2.685133959441226e-05, "loss": 0.7939, "step": 7425 }, { "epoch": 2.41704619388419, "grad_norm": 1.2746686935424805, "learning_rate": 2.682560117599921e-05, "loss": 0.7635, "step": 7430 }, { "epoch": 2.418672739102147, "grad_norm": 1.3409035205841064, "learning_rate": 2.6799860812026188e-05, "loss": 0.77, "step": 7435 }, { "epoch": 2.4202992843201043, "grad_norm": 1.5884721279144287, "learning_rate": 2.6774118529924934e-05, "loss": 0.795, "step": 7440 }, { "epoch": 2.4219258295380612, "grad_norm": 1.3800640106201172, "learning_rate": 2.674837435712923e-05, "loss": 0.75, "step": 7445 }, { "epoch": 2.423552374756018, "grad_norm": 1.2879539728164673, "learning_rate": 2.6722628321074883e-05, "loss": 0.7692, "step": 7450 }, { "epoch": 2.425178919973975, "grad_norm": 1.2591112852096558, "learning_rate": 2.6696880449199685e-05, "loss": 0.7782, "step": 7455 }, { "epoch": 2.4268054651919324, "grad_norm": 1.300410270690918, "learning_rate": 2.667628084834795e-05, "loss": 0.7712, "step": 7460 }, { "epoch": 2.4284320104098893, "grad_norm": 1.472419023513794, "learning_rate": 2.6650529741144665e-05, "loss": 0.7465, "step": 7465 }, { "epoch": 2.4300585556278467, "grad_norm": 1.346839427947998, "learning_rate": 2.6624776874956643e-05, "loss": 0.7474, "step": 7470 }, { "epoch": 2.4316851008458036, "grad_norm": 1.5383296012878418, "learning_rate": 2.6599022277228948e-05, "loss": 0.7672, "step": 7475 }, { "epoch": 2.4333116460637605, "grad_norm": 1.3011785745620728, "learning_rate": 2.65732659754085e-05, "loss": 0.7446, "step": 7480 }, { "epoch": 2.4349381912817174, "grad_norm": 2.0143253803253174, "learning_rate": 2.6547507996944022e-05, "loss": 0.7649, "step": 7485 }, { "epoch": 2.4365647364996748, "grad_norm": 1.4160457849502563, "learning_rate": 2.6521748369286025e-05, "loss": 0.7783, "step": 7490 }, { "epoch": 2.4381912817176317, "grad_norm": 1.4898123741149902, "learning_rate": 2.649598711988679e-05, "loss": 0.7844, "step": 7495 }, { "epoch": 2.439817826935589, "grad_norm": 1.5223138332366943, "learning_rate": 2.6470224276200312e-05, "loss": 0.7661, "step": 7500 }, { "epoch": 2.441444372153546, "grad_norm": 1.4823445081710815, "learning_rate": 2.6444459865682297e-05, "loss": 0.7677, "step": 7505 }, { "epoch": 2.443070917371503, "grad_norm": 1.3023098707199097, "learning_rate": 2.64186939157901e-05, "loss": 0.8, "step": 7510 }, { "epoch": 2.4446974625894597, "grad_norm": 1.33349609375, "learning_rate": 2.6392926453982748e-05, "loss": 0.7991, "step": 7515 }, { "epoch": 2.446324007807417, "grad_norm": 1.385600209236145, "learning_rate": 2.6367157507720852e-05, "loss": 0.7689, "step": 7520 }, { "epoch": 2.447950553025374, "grad_norm": 1.4523452520370483, "learning_rate": 2.6341387104466612e-05, "loss": 0.7579, "step": 7525 }, { "epoch": 2.4495770982433314, "grad_norm": 1.5526230335235596, "learning_rate": 2.6315615271683795e-05, "loss": 0.7688, "step": 7530 }, { "epoch": 2.4512036434612883, "grad_norm": 1.4582562446594238, "learning_rate": 2.6289842036837675e-05, "loss": 0.7777, "step": 7535 }, { "epoch": 2.452830188679245, "grad_norm": 1.5580483675003052, "learning_rate": 2.6264067427395022e-05, "loss": 0.7676, "step": 7540 }, { "epoch": 2.4544567338972025, "grad_norm": 1.6537017822265625, "learning_rate": 2.6238291470824085e-05, "loss": 0.7472, "step": 7545 }, { "epoch": 2.4560832791151594, "grad_norm": 1.5239107608795166, "learning_rate": 2.621251419459453e-05, "loss": 0.7761, "step": 7550 }, { "epoch": 2.4577098243331164, "grad_norm": 1.4437003135681152, "learning_rate": 2.6186735626177428e-05, "loss": 0.7655, "step": 7555 }, { "epoch": 2.4593363695510737, "grad_norm": 1.358473300933838, "learning_rate": 2.6160955793045255e-05, "loss": 0.7556, "step": 7560 }, { "epoch": 2.4609629147690306, "grad_norm": 1.3156447410583496, "learning_rate": 2.6135174722671813e-05, "loss": 0.7812, "step": 7565 }, { "epoch": 2.4625894599869875, "grad_norm": 1.613652229309082, "learning_rate": 2.6109392442532217e-05, "loss": 0.7508, "step": 7570 }, { "epoch": 2.464216005204945, "grad_norm": 1.4314448833465576, "learning_rate": 2.608360898010288e-05, "loss": 0.7945, "step": 7575 }, { "epoch": 2.465842550422902, "grad_norm": 1.4274673461914062, "learning_rate": 2.6057824362861487e-05, "loss": 0.8013, "step": 7580 }, { "epoch": 2.4674690956408587, "grad_norm": 1.1439406871795654, "learning_rate": 2.603203861828693e-05, "loss": 0.7798, "step": 7585 }, { "epoch": 2.469095640858816, "grad_norm": 1.268113136291504, "learning_rate": 2.600625177385932e-05, "loss": 0.7622, "step": 7590 }, { "epoch": 2.470722186076773, "grad_norm": 1.3690615892410278, "learning_rate": 2.598046385705994e-05, "loss": 0.7634, "step": 7595 }, { "epoch": 2.47234873129473, "grad_norm": 1.5218664407730103, "learning_rate": 2.59546748953712e-05, "loss": 0.7638, "step": 7600 }, { "epoch": 2.4739752765126872, "grad_norm": 1.5143173933029175, "learning_rate": 2.5928884916276635e-05, "loss": 0.7685, "step": 7605 }, { "epoch": 2.475601821730644, "grad_norm": 1.334416389465332, "learning_rate": 2.590309394726087e-05, "loss": 0.7814, "step": 7610 }, { "epoch": 2.477228366948601, "grad_norm": 1.2340680360794067, "learning_rate": 2.5877302015809574e-05, "loss": 0.7443, "step": 7615 }, { "epoch": 2.4788549121665584, "grad_norm": 1.5717403888702393, "learning_rate": 2.5851509149409442e-05, "loss": 0.7799, "step": 7620 }, { "epoch": 2.4804814573845153, "grad_norm": 1.407362699508667, "learning_rate": 2.5825715375548175e-05, "loss": 0.7421, "step": 7625 }, { "epoch": 2.482108002602472, "grad_norm": 1.3035858869552612, "learning_rate": 2.5799920721714438e-05, "loss": 0.7651, "step": 7630 }, { "epoch": 2.4837345478204296, "grad_norm": 1.4190073013305664, "learning_rate": 2.5774125215397815e-05, "loss": 0.7733, "step": 7635 }, { "epoch": 2.4853610930383865, "grad_norm": 1.4242690801620483, "learning_rate": 2.5748328884088835e-05, "loss": 0.7647, "step": 7640 }, { "epoch": 2.4869876382563434, "grad_norm": 1.2722551822662354, "learning_rate": 2.5722531755278874e-05, "loss": 0.7795, "step": 7645 }, { "epoch": 2.4886141834743007, "grad_norm": 1.233273983001709, "learning_rate": 2.5696733856460164e-05, "loss": 0.7894, "step": 7650 }, { "epoch": 2.4902407286922577, "grad_norm": 1.3661941289901733, "learning_rate": 2.567093521512578e-05, "loss": 0.7737, "step": 7655 }, { "epoch": 2.4918672739102146, "grad_norm": 1.4394514560699463, "learning_rate": 2.5645135858769563e-05, "loss": 0.7424, "step": 7660 }, { "epoch": 2.493493819128172, "grad_norm": 1.3650847673416138, "learning_rate": 2.561933581488612e-05, "loss": 0.8017, "step": 7665 }, { "epoch": 2.495120364346129, "grad_norm": 1.2953821420669556, "learning_rate": 2.55935351109708e-05, "loss": 0.752, "step": 7670 }, { "epoch": 2.4967469095640857, "grad_norm": 1.3239907026290894, "learning_rate": 2.556773377451965e-05, "loss": 0.769, "step": 7675 }, { "epoch": 2.498373454782043, "grad_norm": 1.3111671209335327, "learning_rate": 2.5541931833029394e-05, "loss": 0.7559, "step": 7680 }, { "epoch": 2.5, "grad_norm": 1.4433430433273315, "learning_rate": 2.5516129313997388e-05, "loss": 0.7535, "step": 7685 }, { "epoch": 2.501626545217957, "grad_norm": 1.419683575630188, "learning_rate": 2.549032624492163e-05, "loss": 0.7661, "step": 7690 }, { "epoch": 2.5032530904359143, "grad_norm": 1.373889684677124, "learning_rate": 2.5464522653300676e-05, "loss": 0.7753, "step": 7695 }, { "epoch": 2.504879635653871, "grad_norm": 1.471888542175293, "learning_rate": 2.543871856663365e-05, "loss": 0.7895, "step": 7700 }, { "epoch": 2.506506180871828, "grad_norm": 1.3124557733535767, "learning_rate": 2.541291401242022e-05, "loss": 0.7659, "step": 7705 }, { "epoch": 2.5081327260897854, "grad_norm": 1.3251322507858276, "learning_rate": 2.5387109018160526e-05, "loss": 0.7684, "step": 7710 }, { "epoch": 2.5097592713077423, "grad_norm": 1.6245193481445312, "learning_rate": 2.536130361135518e-05, "loss": 0.7685, "step": 7715 }, { "epoch": 2.5113858165256993, "grad_norm": 1.4695806503295898, "learning_rate": 2.5335497819505254e-05, "loss": 0.7897, "step": 7720 }, { "epoch": 2.5130123617436566, "grad_norm": 1.3101261854171753, "learning_rate": 2.5309691670112218e-05, "loss": 0.7836, "step": 7725 }, { "epoch": 2.5146389069616135, "grad_norm": 1.4538689851760864, "learning_rate": 2.528388519067792e-05, "loss": 0.7976, "step": 7730 }, { "epoch": 2.5162654521795704, "grad_norm": 1.2086530923843384, "learning_rate": 2.525807840870455e-05, "loss": 0.7706, "step": 7735 }, { "epoch": 2.517891997397528, "grad_norm": 1.2726532220840454, "learning_rate": 2.523227135169466e-05, "loss": 0.7347, "step": 7740 }, { "epoch": 2.5195185426154847, "grad_norm": 1.6441864967346191, "learning_rate": 2.5206464047151046e-05, "loss": 0.7759, "step": 7745 }, { "epoch": 2.5211450878334416, "grad_norm": 1.5144109725952148, "learning_rate": 2.518065652257679e-05, "loss": 0.7885, "step": 7750 }, { "epoch": 2.522771633051399, "grad_norm": 1.5923584699630737, "learning_rate": 2.5154848805475224e-05, "loss": 0.7534, "step": 7755 }, { "epoch": 2.524398178269356, "grad_norm": 1.3655023574829102, "learning_rate": 2.5129040923349863e-05, "loss": 0.7608, "step": 7760 }, { "epoch": 2.526024723487313, "grad_norm": 1.5574524402618408, "learning_rate": 2.5103232903704393e-05, "loss": 0.7752, "step": 7765 }, { "epoch": 2.52765126870527, "grad_norm": 1.448049783706665, "learning_rate": 2.5077424774042675e-05, "loss": 0.7522, "step": 7770 }, { "epoch": 2.529277813923227, "grad_norm": 1.5945137739181519, "learning_rate": 2.5051616561868663e-05, "loss": 0.7427, "step": 7775 }, { "epoch": 2.530904359141184, "grad_norm": 1.6864335536956787, "learning_rate": 2.5025808294686397e-05, "loss": 0.7802, "step": 7780 }, { "epoch": 2.5325309043591413, "grad_norm": 1.4522473812103271, "learning_rate": 2.5e-05, "loss": 0.7853, "step": 7785 }, { "epoch": 2.534157449577098, "grad_norm": 1.6900161504745483, "learning_rate": 2.4974191705313602e-05, "loss": 0.765, "step": 7790 }, { "epoch": 2.5357839947950556, "grad_norm": 1.483323097229004, "learning_rate": 2.4948383438131346e-05, "loss": 0.7491, "step": 7795 }, { "epoch": 2.5374105400130125, "grad_norm": 1.4713019132614136, "learning_rate": 2.4922575225957327e-05, "loss": 0.7354, "step": 7800 }, { "epoch": 2.5390370852309694, "grad_norm": 1.8524311780929565, "learning_rate": 2.4896767096295613e-05, "loss": 0.7501, "step": 7805 }, { "epoch": 2.5406636304489263, "grad_norm": 1.3181524276733398, "learning_rate": 2.4870959076650143e-05, "loss": 0.7622, "step": 7810 }, { "epoch": 2.5422901756668836, "grad_norm": 1.4714128971099854, "learning_rate": 2.484515119452478e-05, "loss": 0.7605, "step": 7815 }, { "epoch": 2.5439167208848406, "grad_norm": 1.4760098457336426, "learning_rate": 2.481934347742321e-05, "loss": 0.7521, "step": 7820 }, { "epoch": 2.545543266102798, "grad_norm": 1.3128886222839355, "learning_rate": 2.4793535952848963e-05, "loss": 0.7979, "step": 7825 }, { "epoch": 2.547169811320755, "grad_norm": 1.5653152465820312, "learning_rate": 2.4767728648305343e-05, "loss": 0.8145, "step": 7830 }, { "epoch": 2.5487963565387117, "grad_norm": 1.3934942483901978, "learning_rate": 2.4741921591295454e-05, "loss": 0.747, "step": 7835 }, { "epoch": 2.5504229017566686, "grad_norm": 1.6231136322021484, "learning_rate": 2.471611480932209e-05, "loss": 0.7378, "step": 7840 }, { "epoch": 2.552049446974626, "grad_norm": 1.6661062240600586, "learning_rate": 2.4690308329887788e-05, "loss": 0.7869, "step": 7845 }, { "epoch": 2.553675992192583, "grad_norm": 1.803740382194519, "learning_rate": 2.466450218049475e-05, "loss": 0.7532, "step": 7850 }, { "epoch": 2.5553025374105403, "grad_norm": 1.3901104927062988, "learning_rate": 2.463869638864483e-05, "loss": 0.7529, "step": 7855 }, { "epoch": 2.556929082628497, "grad_norm": 1.384700059890747, "learning_rate": 2.461289098183948e-05, "loss": 0.7795, "step": 7860 }, { "epoch": 2.558555627846454, "grad_norm": 1.9271655082702637, "learning_rate": 2.458708598757979e-05, "loss": 0.7333, "step": 7865 }, { "epoch": 2.560182173064411, "grad_norm": 1.3065274953842163, "learning_rate": 2.456128143336635e-05, "loss": 0.736, "step": 7870 }, { "epoch": 2.5618087182823683, "grad_norm": 1.3320229053497314, "learning_rate": 2.4535477346699333e-05, "loss": 0.7566, "step": 7875 }, { "epoch": 2.5634352635003252, "grad_norm": 1.4683573246002197, "learning_rate": 2.4509673755078376e-05, "loss": 0.7406, "step": 7880 }, { "epoch": 2.5650618087182826, "grad_norm": 1.4413936138153076, "learning_rate": 2.4483870686002625e-05, "loss": 0.7783, "step": 7885 }, { "epoch": 2.5666883539362395, "grad_norm": 1.4812002182006836, "learning_rate": 2.4458068166970612e-05, "loss": 0.7631, "step": 7890 }, { "epoch": 2.5683148991541964, "grad_norm": 1.316504955291748, "learning_rate": 2.443226622548036e-05, "loss": 0.7338, "step": 7895 }, { "epoch": 2.5699414443721533, "grad_norm": 1.2580163478851318, "learning_rate": 2.4406464889029206e-05, "loss": 0.7821, "step": 7900 }, { "epoch": 2.5715679895901107, "grad_norm": 1.9188246726989746, "learning_rate": 2.4380664185113887e-05, "loss": 0.7772, "step": 7905 }, { "epoch": 2.5731945348080676, "grad_norm": 1.4835723638534546, "learning_rate": 2.4354864141230443e-05, "loss": 0.7652, "step": 7910 }, { "epoch": 2.574821080026025, "grad_norm": 1.5174182653427124, "learning_rate": 2.432906478487423e-05, "loss": 0.7826, "step": 7915 }, { "epoch": 2.576447625243982, "grad_norm": 1.5450952053070068, "learning_rate": 2.4303266143539838e-05, "loss": 0.7445, "step": 7920 }, { "epoch": 2.5780741704619388, "grad_norm": 1.4598166942596436, "learning_rate": 2.427746824472113e-05, "loss": 0.7802, "step": 7925 }, { "epoch": 2.5797007156798957, "grad_norm": 1.7827513217926025, "learning_rate": 2.425167111591117e-05, "loss": 0.7678, "step": 7930 }, { "epoch": 2.581327260897853, "grad_norm": 1.2154653072357178, "learning_rate": 2.4225874784602184e-05, "loss": 0.7609, "step": 7935 }, { "epoch": 2.58295380611581, "grad_norm": 1.5180944204330444, "learning_rate": 2.4200079278285568e-05, "loss": 0.7816, "step": 7940 }, { "epoch": 2.5845803513337673, "grad_norm": 1.4127391576766968, "learning_rate": 2.4174284624451824e-05, "loss": 0.7899, "step": 7945 }, { "epoch": 2.586206896551724, "grad_norm": 1.3065491914749146, "learning_rate": 2.4148490850590564e-05, "loss": 0.7686, "step": 7950 }, { "epoch": 2.587833441769681, "grad_norm": 1.413087010383606, "learning_rate": 2.4122697984190428e-05, "loss": 0.7529, "step": 7955 }, { "epoch": 2.589459986987638, "grad_norm": 1.1977612972259521, "learning_rate": 2.4096906052739134e-05, "loss": 0.7459, "step": 7960 }, { "epoch": 2.5910865322055954, "grad_norm": 1.7029626369476318, "learning_rate": 2.4071115083723364e-05, "loss": 0.747, "step": 7965 }, { "epoch": 2.5927130774235523, "grad_norm": 1.494903564453125, "learning_rate": 2.4045325104628806e-05, "loss": 0.7828, "step": 7970 }, { "epoch": 2.5943396226415096, "grad_norm": 1.6146756410598755, "learning_rate": 2.401953614294006e-05, "loss": 0.7777, "step": 7975 }, { "epoch": 2.5959661678594665, "grad_norm": 1.2888963222503662, "learning_rate": 2.3993748226140682e-05, "loss": 0.7426, "step": 7980 }, { "epoch": 2.5975927130774235, "grad_norm": 1.2949042320251465, "learning_rate": 2.396796138171307e-05, "loss": 0.7288, "step": 7985 }, { "epoch": 2.5992192582953804, "grad_norm": 1.5769168138504028, "learning_rate": 2.394217563713852e-05, "loss": 0.7633, "step": 7990 }, { "epoch": 2.6008458035133377, "grad_norm": 1.499580979347229, "learning_rate": 2.391639101989712e-05, "loss": 0.7849, "step": 7995 }, { "epoch": 2.6024723487312946, "grad_norm": 1.2143267393112183, "learning_rate": 2.3890607557467792e-05, "loss": 0.7521, "step": 8000 }, { "epoch": 2.604098893949252, "grad_norm": 1.268778920173645, "learning_rate": 2.3864825277328193e-05, "loss": 0.7546, "step": 8005 }, { "epoch": 2.605725439167209, "grad_norm": 1.3528192043304443, "learning_rate": 2.3839044206954754e-05, "loss": 0.7497, "step": 8010 }, { "epoch": 2.607351984385166, "grad_norm": 1.3386938571929932, "learning_rate": 2.3813264373822578e-05, "loss": 0.7678, "step": 8015 }, { "epoch": 2.6089785296031227, "grad_norm": 1.2361555099487305, "learning_rate": 2.3787485805405484e-05, "loss": 0.7508, "step": 8020 }, { "epoch": 2.61060507482108, "grad_norm": 1.3544996976852417, "learning_rate": 2.376170852917592e-05, "loss": 0.778, "step": 8025 }, { "epoch": 2.612231620039037, "grad_norm": 1.272444486618042, "learning_rate": 2.3735932572604987e-05, "loss": 0.7659, "step": 8030 }, { "epoch": 2.6138581652569943, "grad_norm": 1.423722505569458, "learning_rate": 2.3710157963162328e-05, "loss": 0.7837, "step": 8035 }, { "epoch": 2.6154847104749512, "grad_norm": 1.2738415002822876, "learning_rate": 2.3684384728316214e-05, "loss": 0.7669, "step": 8040 }, { "epoch": 2.617111255692908, "grad_norm": 1.481910228729248, "learning_rate": 2.3658612895533393e-05, "loss": 0.7956, "step": 8045 }, { "epoch": 2.618737800910865, "grad_norm": 1.786193609237671, "learning_rate": 2.3632842492279157e-05, "loss": 0.7683, "step": 8050 }, { "epoch": 2.6203643461288224, "grad_norm": 1.60805082321167, "learning_rate": 2.3607073546017258e-05, "loss": 0.7467, "step": 8055 }, { "epoch": 2.6219908913467793, "grad_norm": 1.3662629127502441, "learning_rate": 2.3581306084209902e-05, "loss": 0.766, "step": 8060 }, { "epoch": 2.6236174365647367, "grad_norm": 1.6549227237701416, "learning_rate": 2.3555540134317712e-05, "loss": 0.7579, "step": 8065 }, { "epoch": 2.6252439817826936, "grad_norm": 1.5518466234207153, "learning_rate": 2.3529775723799684e-05, "loss": 0.7411, "step": 8070 }, { "epoch": 2.6268705270006505, "grad_norm": 1.3866219520568848, "learning_rate": 2.3504012880113216e-05, "loss": 0.7604, "step": 8075 }, { "epoch": 2.6284970722186074, "grad_norm": 1.5808013677597046, "learning_rate": 2.3478251630713978e-05, "loss": 0.7726, "step": 8080 }, { "epoch": 2.6301236174365648, "grad_norm": 1.2971431016921997, "learning_rate": 2.3452492003055984e-05, "loss": 0.7689, "step": 8085 }, { "epoch": 2.6317501626545217, "grad_norm": 1.2560617923736572, "learning_rate": 2.34267340245915e-05, "loss": 0.7723, "step": 8090 }, { "epoch": 2.633376707872479, "grad_norm": 1.3392728567123413, "learning_rate": 2.3400977722771058e-05, "loss": 0.7357, "step": 8095 }, { "epoch": 2.635003253090436, "grad_norm": 1.3546581268310547, "learning_rate": 2.3375223125043356e-05, "loss": 0.7305, "step": 8100 }, { "epoch": 2.636629798308393, "grad_norm": 1.4520702362060547, "learning_rate": 2.3349470258855337e-05, "loss": 0.8231, "step": 8105 }, { "epoch": 2.63825634352635, "grad_norm": 1.3426767587661743, "learning_rate": 2.3323719151652047e-05, "loss": 0.7961, "step": 8110 }, { "epoch": 2.639882888744307, "grad_norm": 1.2050358057022095, "learning_rate": 2.3297969830876686e-05, "loss": 0.7718, "step": 8115 }, { "epoch": 2.641509433962264, "grad_norm": 1.2531814575195312, "learning_rate": 2.327222232397054e-05, "loss": 0.7688, "step": 8120 }, { "epoch": 2.6431359791802214, "grad_norm": 1.245906114578247, "learning_rate": 2.3246476658372973e-05, "loss": 0.7586, "step": 8125 }, { "epoch": 2.6447625243981783, "grad_norm": 1.2808623313903809, "learning_rate": 2.3220732861521358e-05, "loss": 0.7898, "step": 8130 }, { "epoch": 2.646389069616135, "grad_norm": 1.421043872833252, "learning_rate": 2.3194990960851112e-05, "loss": 0.748, "step": 8135 }, { "epoch": 2.6480156148340925, "grad_norm": 1.5289032459259033, "learning_rate": 2.3169250983795607e-05, "loss": 0.7389, "step": 8140 }, { "epoch": 2.6496421600520494, "grad_norm": 1.485734462738037, "learning_rate": 2.3143512957786184e-05, "loss": 0.7681, "step": 8145 }, { "epoch": 2.6512687052700064, "grad_norm": 1.4009584188461304, "learning_rate": 2.311777691025208e-05, "loss": 0.7727, "step": 8150 }, { "epoch": 2.6528952504879637, "grad_norm": 1.286514401435852, "learning_rate": 2.309204286862046e-05, "loss": 0.7835, "step": 8155 }, { "epoch": 2.6545217957059206, "grad_norm": 1.298724889755249, "learning_rate": 2.3066310860316298e-05, "loss": 0.7811, "step": 8160 }, { "epoch": 2.6561483409238775, "grad_norm": 1.3715639114379883, "learning_rate": 2.3040580912762456e-05, "loss": 0.7562, "step": 8165 }, { "epoch": 2.657774886141835, "grad_norm": 1.5396109819412231, "learning_rate": 2.3014853053379566e-05, "loss": 0.7532, "step": 8170 }, { "epoch": 2.659401431359792, "grad_norm": 2.010249137878418, "learning_rate": 2.298912730958605e-05, "loss": 0.777, "step": 8175 }, { "epoch": 2.6610279765777487, "grad_norm": 1.6755378246307373, "learning_rate": 2.296340370879806e-05, "loss": 0.7552, "step": 8180 }, { "epoch": 2.662654521795706, "grad_norm": 1.4169247150421143, "learning_rate": 2.2937682278429494e-05, "loss": 0.7787, "step": 8185 }, { "epoch": 2.664281067013663, "grad_norm": 1.5459901094436646, "learning_rate": 2.291196304589189e-05, "loss": 0.7815, "step": 8190 }, { "epoch": 2.66590761223162, "grad_norm": 1.2591333389282227, "learning_rate": 2.288624603859449e-05, "loss": 0.7444, "step": 8195 }, { "epoch": 2.6675341574495772, "grad_norm": 1.4737584590911865, "learning_rate": 2.2860531283944147e-05, "loss": 0.7675, "step": 8200 }, { "epoch": 2.669160702667534, "grad_norm": 1.2480300664901733, "learning_rate": 2.2834818809345297e-05, "loss": 0.7294, "step": 8205 }, { "epoch": 2.6707872478854915, "grad_norm": 1.3134061098098755, "learning_rate": 2.280910864219997e-05, "loss": 0.7405, "step": 8210 }, { "epoch": 2.6724137931034484, "grad_norm": 1.3522766828536987, "learning_rate": 2.2783400809907726e-05, "loss": 0.7459, "step": 8215 }, { "epoch": 2.6740403383214053, "grad_norm": 1.2937122583389282, "learning_rate": 2.2757695339865648e-05, "loss": 0.7697, "step": 8220 }, { "epoch": 2.675666883539362, "grad_norm": 1.3313663005828857, "learning_rate": 2.2731992259468272e-05, "loss": 0.7702, "step": 8225 }, { "epoch": 2.6772934287573196, "grad_norm": 1.4701040983200073, "learning_rate": 2.2706291596107624e-05, "loss": 0.7953, "step": 8230 }, { "epoch": 2.6789199739752765, "grad_norm": 1.3019791841506958, "learning_rate": 2.2680593377173124e-05, "loss": 0.7579, "step": 8235 }, { "epoch": 2.680546519193234, "grad_norm": 1.646182894706726, "learning_rate": 2.2654897630051625e-05, "loss": 0.7537, "step": 8240 }, { "epoch": 2.6821730644111907, "grad_norm": 1.5963387489318848, "learning_rate": 2.2629204382127284e-05, "loss": 0.7662, "step": 8245 }, { "epoch": 2.6837996096291477, "grad_norm": 1.209472894668579, "learning_rate": 2.2603513660781668e-05, "loss": 0.76, "step": 8250 }, { "epoch": 2.6854261548471046, "grad_norm": 1.2272487878799438, "learning_rate": 2.257782549339359e-05, "loss": 0.7866, "step": 8255 }, { "epoch": 2.687052700065062, "grad_norm": 1.4570890665054321, "learning_rate": 2.255213990733918e-05, "loss": 0.7529, "step": 8260 }, { "epoch": 2.688679245283019, "grad_norm": 1.3137797117233276, "learning_rate": 2.2526456929991793e-05, "loss": 0.7849, "step": 8265 }, { "epoch": 2.690305790500976, "grad_norm": 1.4221347570419312, "learning_rate": 2.2500776588722035e-05, "loss": 0.7689, "step": 8270 }, { "epoch": 2.691932335718933, "grad_norm": 1.3585658073425293, "learning_rate": 2.2475098910897645e-05, "loss": 0.78, "step": 8275 }, { "epoch": 2.69355888093689, "grad_norm": 1.5322914123535156, "learning_rate": 2.2449423923883605e-05, "loss": 0.7779, "step": 8280 }, { "epoch": 2.695185426154847, "grad_norm": 1.4589310884475708, "learning_rate": 2.2423751655041952e-05, "loss": 0.7418, "step": 8285 }, { "epoch": 2.6968119713728043, "grad_norm": 1.4333163499832153, "learning_rate": 2.2398082131731874e-05, "loss": 0.7747, "step": 8290 }, { "epoch": 2.698438516590761, "grad_norm": 1.560063362121582, "learning_rate": 2.237241538130961e-05, "loss": 0.7528, "step": 8295 }, { "epoch": 2.7000650618087185, "grad_norm": 1.2527198791503906, "learning_rate": 2.2346751431128464e-05, "loss": 0.7412, "step": 8300 }, { "epoch": 2.7016916070266754, "grad_norm": 1.281125545501709, "learning_rate": 2.2321090308538732e-05, "loss": 0.7544, "step": 8305 }, { "epoch": 2.7033181522446323, "grad_norm": 1.4430122375488281, "learning_rate": 2.2295432040887714e-05, "loss": 0.7684, "step": 8310 }, { "epoch": 2.7049446974625893, "grad_norm": 1.3339476585388184, "learning_rate": 2.2269776655519658e-05, "loss": 0.7312, "step": 8315 }, { "epoch": 2.7065712426805466, "grad_norm": 1.4589042663574219, "learning_rate": 2.2244124179775748e-05, "loss": 0.7947, "step": 8320 }, { "epoch": 2.7081977878985035, "grad_norm": 1.3441073894500732, "learning_rate": 2.2218474640994063e-05, "loss": 0.7571, "step": 8325 }, { "epoch": 2.709824333116461, "grad_norm": 1.3681058883666992, "learning_rate": 2.219282806650955e-05, "loss": 0.753, "step": 8330 }, { "epoch": 2.711450878334418, "grad_norm": 1.255679726600647, "learning_rate": 2.2167184483654013e-05, "loss": 0.7609, "step": 8335 }, { "epoch": 2.7130774235523747, "grad_norm": 1.347902536392212, "learning_rate": 2.2141543919756034e-05, "loss": 0.7614, "step": 8340 }, { "epoch": 2.7147039687703316, "grad_norm": 1.5413652658462524, "learning_rate": 2.211590640214101e-05, "loss": 0.7611, "step": 8345 }, { "epoch": 2.716330513988289, "grad_norm": 1.3992787599563599, "learning_rate": 2.2090271958131074e-05, "loss": 0.7903, "step": 8350 }, { "epoch": 2.717957059206246, "grad_norm": 1.5938060283660889, "learning_rate": 2.2064640615045092e-05, "loss": 0.7862, "step": 8355 }, { "epoch": 2.719583604424203, "grad_norm": 1.4218285083770752, "learning_rate": 2.203901240019862e-05, "loss": 0.7671, "step": 8360 }, { "epoch": 2.72121014964216, "grad_norm": 1.5432084798812866, "learning_rate": 2.2013387340903893e-05, "loss": 0.769, "step": 8365 }, { "epoch": 2.722836694860117, "grad_norm": 1.7188189029693604, "learning_rate": 2.1987765464469755e-05, "loss": 0.7762, "step": 8370 }, { "epoch": 2.724463240078074, "grad_norm": 1.5086896419525146, "learning_rate": 2.1962146798201684e-05, "loss": 0.7839, "step": 8375 }, { "epoch": 2.7260897852960313, "grad_norm": 1.3536514043807983, "learning_rate": 2.193653136940173e-05, "loss": 0.7741, "step": 8380 }, { "epoch": 2.727716330513988, "grad_norm": 1.3328180313110352, "learning_rate": 2.191091920536849e-05, "loss": 0.7602, "step": 8385 }, { "epoch": 2.7293428757319456, "grad_norm": 1.3692196607589722, "learning_rate": 2.188531033339708e-05, "loss": 0.7626, "step": 8390 }, { "epoch": 2.7309694209499025, "grad_norm": 1.3529107570648193, "learning_rate": 2.1859704780779126e-05, "loss": 0.7916, "step": 8395 }, { "epoch": 2.7325959661678594, "grad_norm": 1.5621742010116577, "learning_rate": 2.1834102574802674e-05, "loss": 0.7719, "step": 8400 }, { "epoch": 2.7342225113858163, "grad_norm": 1.5188515186309814, "learning_rate": 2.1808503742752252e-05, "loss": 0.7452, "step": 8405 }, { "epoch": 2.7358490566037736, "grad_norm": 1.5914379358291626, "learning_rate": 2.1782908311908756e-05, "loss": 0.7744, "step": 8410 }, { "epoch": 2.7374756018217306, "grad_norm": 1.5430190563201904, "learning_rate": 2.175731630954949e-05, "loss": 0.7657, "step": 8415 }, { "epoch": 2.739102147039688, "grad_norm": 1.25099778175354, "learning_rate": 2.1731727762948066e-05, "loss": 0.7593, "step": 8420 }, { "epoch": 2.740728692257645, "grad_norm": 1.5794453620910645, "learning_rate": 2.1706142699374454e-05, "loss": 0.7718, "step": 8425 }, { "epoch": 2.7423552374756017, "grad_norm": 1.5661520957946777, "learning_rate": 2.1680561146094874e-05, "loss": 0.7373, "step": 8430 }, { "epoch": 2.7439817826935586, "grad_norm": 1.5071723461151123, "learning_rate": 2.1654983130371837e-05, "loss": 0.7736, "step": 8435 }, { "epoch": 2.745608327911516, "grad_norm": 1.2616939544677734, "learning_rate": 2.1629408679464055e-05, "loss": 0.7975, "step": 8440 }, { "epoch": 2.747234873129473, "grad_norm": 1.380009412765503, "learning_rate": 2.1603837820626478e-05, "loss": 0.7463, "step": 8445 }, { "epoch": 2.7488614183474303, "grad_norm": 1.5415937900543213, "learning_rate": 2.1578270581110173e-05, "loss": 0.7789, "step": 8450 }, { "epoch": 2.750487963565387, "grad_norm": 1.7853513956069946, "learning_rate": 2.1552706988162417e-05, "loss": 0.7756, "step": 8455 }, { "epoch": 2.752114508783344, "grad_norm": 1.3353517055511475, "learning_rate": 2.1532258757641216e-05, "loss": 0.7632, "step": 8460 }, { "epoch": 2.753741054001301, "grad_norm": 1.6096117496490479, "learning_rate": 2.1506701797167435e-05, "loss": 0.7876, "step": 8465 }, { "epoch": 2.7553675992192583, "grad_norm": 1.2259865999221802, "learning_rate": 2.14811485595337e-05, "loss": 0.7729, "step": 8470 }, { "epoch": 2.7569941444372152, "grad_norm": 1.2252434492111206, "learning_rate": 2.1455599071972344e-05, "loss": 0.763, "step": 8475 }, { "epoch": 2.7586206896551726, "grad_norm": 1.3717985153198242, "learning_rate": 2.143005336171166e-05, "loss": 0.7556, "step": 8480 }, { "epoch": 2.7602472348731295, "grad_norm": 1.3713133335113525, "learning_rate": 2.1404511455975977e-05, "loss": 0.7908, "step": 8485 }, { "epoch": 2.7618737800910864, "grad_norm": 1.3020001649856567, "learning_rate": 2.137897338198552e-05, "loss": 0.7276, "step": 8490 }, { "epoch": 2.7635003253090433, "grad_norm": 1.6197290420532227, "learning_rate": 2.1353439166956453e-05, "loss": 0.7772, "step": 8495 }, { "epoch": 2.7651268705270007, "grad_norm": 1.7933818101882935, "learning_rate": 2.1327908838100824e-05, "loss": 0.7844, "step": 8500 }, { "epoch": 2.7667534157449576, "grad_norm": 1.2953613996505737, "learning_rate": 2.1302382422626556e-05, "loss": 0.7799, "step": 8505 }, { "epoch": 2.768379960962915, "grad_norm": 1.1471056938171387, "learning_rate": 2.1276859947737356e-05, "loss": 0.7749, "step": 8510 }, { "epoch": 2.770006506180872, "grad_norm": 1.3836361169815063, "learning_rate": 2.1251341440632783e-05, "loss": 0.7581, "step": 8515 }, { "epoch": 2.7716330513988288, "grad_norm": 1.1558619737625122, "learning_rate": 2.122582692850813e-05, "loss": 0.7905, "step": 8520 }, { "epoch": 2.7732595966167857, "grad_norm": 1.3663548231124878, "learning_rate": 2.120031643855446e-05, "loss": 0.769, "step": 8525 }, { "epoch": 2.774886141834743, "grad_norm": 1.510703206062317, "learning_rate": 2.117480999795853e-05, "loss": 0.7909, "step": 8530 }, { "epoch": 2.7765126870527, "grad_norm": 1.3669439554214478, "learning_rate": 2.114930763390279e-05, "loss": 0.765, "step": 8535 }, { "epoch": 2.7781392322706573, "grad_norm": 1.2012897729873657, "learning_rate": 2.1123809373565342e-05, "loss": 0.7501, "step": 8540 }, { "epoch": 2.779765777488614, "grad_norm": 1.4639004468917847, "learning_rate": 2.1098315244119922e-05, "loss": 0.7532, "step": 8545 }, { "epoch": 2.781392322706571, "grad_norm": 1.2535903453826904, "learning_rate": 2.1072825272735848e-05, "loss": 0.7697, "step": 8550 }, { "epoch": 2.7830188679245285, "grad_norm": 1.5976831912994385, "learning_rate": 2.104733948657803e-05, "loss": 0.7315, "step": 8555 }, { "epoch": 2.7846454131424854, "grad_norm": 1.3188773393630981, "learning_rate": 2.102185791280689e-05, "loss": 0.766, "step": 8560 }, { "epoch": 2.7862719583604423, "grad_norm": 1.5026614665985107, "learning_rate": 2.09963805785784e-05, "loss": 0.76, "step": 8565 }, { "epoch": 2.7878985035783996, "grad_norm": 1.4453078508377075, "learning_rate": 2.0970907511043964e-05, "loss": 0.7868, "step": 8570 }, { "epoch": 2.7895250487963565, "grad_norm": 1.3378890752792358, "learning_rate": 2.0945438737350476e-05, "loss": 0.777, "step": 8575 }, { "epoch": 2.7911515940143135, "grad_norm": 1.2877166271209717, "learning_rate": 2.0919974284640237e-05, "loss": 0.7984, "step": 8580 }, { "epoch": 2.792778139232271, "grad_norm": 1.593629002571106, "learning_rate": 2.0894514180050964e-05, "loss": 0.7713, "step": 8585 }, { "epoch": 2.7944046844502277, "grad_norm": 1.3870834112167358, "learning_rate": 2.0869058450715696e-05, "loss": 0.7704, "step": 8590 }, { "epoch": 2.7960312296681846, "grad_norm": 1.3313112258911133, "learning_rate": 2.0843607123762875e-05, "loss": 0.8132, "step": 8595 }, { "epoch": 2.797657774886142, "grad_norm": 1.2662553787231445, "learning_rate": 2.081816022631618e-05, "loss": 0.7827, "step": 8600 }, { "epoch": 2.799284320104099, "grad_norm": 1.1383579969406128, "learning_rate": 2.079271778549462e-05, "loss": 0.7622, "step": 8605 }, { "epoch": 2.800910865322056, "grad_norm": 1.539667010307312, "learning_rate": 2.0767279828412442e-05, "loss": 0.7934, "step": 8610 }, { "epoch": 2.802537410540013, "grad_norm": 1.4475916624069214, "learning_rate": 2.0741846382179102e-05, "loss": 0.7712, "step": 8615 }, { "epoch": 2.80416395575797, "grad_norm": 1.2582719326019287, "learning_rate": 2.0716417473899268e-05, "loss": 0.8015, "step": 8620 }, { "epoch": 2.805790500975927, "grad_norm": 1.3313987255096436, "learning_rate": 2.069099313067275e-05, "loss": 0.72, "step": 8625 }, { "epoch": 2.8074170461938843, "grad_norm": 1.4567219018936157, "learning_rate": 2.0665573379594516e-05, "loss": 0.7528, "step": 8630 }, { "epoch": 2.8090435914118412, "grad_norm": 1.664899468421936, "learning_rate": 2.0640158247754614e-05, "loss": 0.7591, "step": 8635 }, { "epoch": 2.810670136629798, "grad_norm": 1.3050580024719238, "learning_rate": 2.06147477622382e-05, "loss": 0.8043, "step": 8640 }, { "epoch": 2.8122966818477555, "grad_norm": 1.359656810760498, "learning_rate": 2.0589341950125444e-05, "loss": 0.7582, "step": 8645 }, { "epoch": 2.8139232270657124, "grad_norm": 1.4428551197052002, "learning_rate": 2.056394083849158e-05, "loss": 0.7355, "step": 8650 }, { "epoch": 2.8155497722836698, "grad_norm": 1.5091948509216309, "learning_rate": 2.0538544454406776e-05, "loss": 0.791, "step": 8655 }, { "epoch": 2.8171763175016267, "grad_norm": 1.3959429264068604, "learning_rate": 2.051315282493622e-05, "loss": 0.7926, "step": 8660 }, { "epoch": 2.8188028627195836, "grad_norm": 1.2459297180175781, "learning_rate": 2.048776597713998e-05, "loss": 0.768, "step": 8665 }, { "epoch": 2.8204294079375405, "grad_norm": 1.3548305034637451, "learning_rate": 2.0462383938073078e-05, "loss": 0.7521, "step": 8670 }, { "epoch": 2.822055953155498, "grad_norm": 1.2497638463974, "learning_rate": 2.0437006734785365e-05, "loss": 0.7648, "step": 8675 }, { "epoch": 2.8236824983734548, "grad_norm": 1.5383737087249756, "learning_rate": 2.0411634394321578e-05, "loss": 0.7749, "step": 8680 }, { "epoch": 2.825309043591412, "grad_norm": 1.318953514099121, "learning_rate": 2.0386266943721235e-05, "loss": 0.7791, "step": 8685 }, { "epoch": 2.826935588809369, "grad_norm": 1.3510106801986694, "learning_rate": 2.0360904410018676e-05, "loss": 0.7343, "step": 8690 }, { "epoch": 2.828562134027326, "grad_norm": 1.596946120262146, "learning_rate": 2.033554682024298e-05, "loss": 0.7882, "step": 8695 }, { "epoch": 2.830188679245283, "grad_norm": 1.7994003295898438, "learning_rate": 2.031019420141797e-05, "loss": 0.7581, "step": 8700 }, { "epoch": 2.83181522446324, "grad_norm": 1.6367297172546387, "learning_rate": 2.028484658056216e-05, "loss": 0.7625, "step": 8705 }, { "epoch": 2.833441769681197, "grad_norm": 1.5863484144210815, "learning_rate": 2.025950398468875e-05, "loss": 0.7447, "step": 8710 }, { "epoch": 2.8350683148991545, "grad_norm": 1.367453932762146, "learning_rate": 2.0234166440805562e-05, "loss": 0.7786, "step": 8715 }, { "epoch": 2.8366948601171114, "grad_norm": 1.350319266319275, "learning_rate": 2.020883397591507e-05, "loss": 0.7624, "step": 8720 }, { "epoch": 2.8383214053350683, "grad_norm": 1.537142276763916, "learning_rate": 2.0183506617014293e-05, "loss": 0.761, "step": 8725 }, { "epoch": 2.839947950553025, "grad_norm": 1.3316154479980469, "learning_rate": 2.015818439109485e-05, "loss": 0.7411, "step": 8730 }, { "epoch": 2.8415744957709825, "grad_norm": 1.3300597667694092, "learning_rate": 2.0132867325142858e-05, "loss": 0.7357, "step": 8735 }, { "epoch": 2.8432010409889394, "grad_norm": 1.4253225326538086, "learning_rate": 2.010755544613895e-05, "loss": 0.767, "step": 8740 }, { "epoch": 2.844827586206897, "grad_norm": 1.2955018281936646, "learning_rate": 2.008224878105824e-05, "loss": 0.7555, "step": 8745 }, { "epoch": 2.8464541314248537, "grad_norm": 1.4968903064727783, "learning_rate": 2.005694735687025e-05, "loss": 0.7482, "step": 8750 }, { "epoch": 2.8480806766428106, "grad_norm": 1.2922298908233643, "learning_rate": 2.0031651200538963e-05, "loss": 0.7245, "step": 8755 }, { "epoch": 2.8497072218607675, "grad_norm": 1.6212042570114136, "learning_rate": 2.000636033902271e-05, "loss": 0.7908, "step": 8760 }, { "epoch": 2.851333767078725, "grad_norm": 1.5850560665130615, "learning_rate": 1.998107479927421e-05, "loss": 0.7749, "step": 8765 }, { "epoch": 2.852960312296682, "grad_norm": 2.016087532043457, "learning_rate": 1.995579460824048e-05, "loss": 0.7599, "step": 8770 }, { "epoch": 2.854586857514639, "grad_norm": 1.4416437149047852, "learning_rate": 1.9930519792862873e-05, "loss": 0.746, "step": 8775 }, { "epoch": 2.856213402732596, "grad_norm": 1.287035346031189, "learning_rate": 1.9905250380076965e-05, "loss": 0.7556, "step": 8780 }, { "epoch": 2.857839947950553, "grad_norm": 1.4517265558242798, "learning_rate": 1.9879986396812623e-05, "loss": 0.7689, "step": 8785 }, { "epoch": 2.85946649316851, "grad_norm": 1.3363392353057861, "learning_rate": 1.985472786999389e-05, "loss": 0.7605, "step": 8790 }, { "epoch": 2.8610930383864672, "grad_norm": 1.3558491468429565, "learning_rate": 1.982947482653903e-05, "loss": 0.7418, "step": 8795 }, { "epoch": 2.862719583604424, "grad_norm": 1.4412764310836792, "learning_rate": 1.980422729336042e-05, "loss": 0.753, "step": 8800 }, { "epoch": 2.8643461288223815, "grad_norm": 1.3449541330337524, "learning_rate": 1.977898529736462e-05, "loss": 0.7771, "step": 8805 }, { "epoch": 2.8659726740403384, "grad_norm": 1.290846586227417, "learning_rate": 1.9753748865452226e-05, "loss": 0.7534, "step": 8810 }, { "epoch": 2.8675992192582953, "grad_norm": 1.3596643209457397, "learning_rate": 1.9728518024517957e-05, "loss": 0.7367, "step": 8815 }, { "epoch": 2.869225764476252, "grad_norm": 1.3451288938522339, "learning_rate": 1.9703292801450536e-05, "loss": 0.7523, "step": 8820 }, { "epoch": 2.8708523096942096, "grad_norm": 1.3395687341690063, "learning_rate": 1.9678073223132737e-05, "loss": 0.7794, "step": 8825 }, { "epoch": 2.8724788549121665, "grad_norm": 1.6842072010040283, "learning_rate": 1.9652859316441267e-05, "loss": 0.7393, "step": 8830 }, { "epoch": 2.874105400130124, "grad_norm": 1.490450143814087, "learning_rate": 1.9627651108246848e-05, "loss": 0.7378, "step": 8835 }, { "epoch": 2.8757319453480807, "grad_norm": 1.3513338565826416, "learning_rate": 1.9602448625414077e-05, "loss": 0.7286, "step": 8840 }, { "epoch": 2.8773584905660377, "grad_norm": 1.4203851222991943, "learning_rate": 1.9577251894801488e-05, "loss": 0.7679, "step": 8845 }, { "epoch": 2.8789850357839946, "grad_norm": 1.4048949480056763, "learning_rate": 1.9552060943261456e-05, "loss": 0.7924, "step": 8850 }, { "epoch": 2.880611581001952, "grad_norm": 1.3703855276107788, "learning_rate": 1.9526875797640226e-05, "loss": 0.7544, "step": 8855 }, { "epoch": 2.882238126219909, "grad_norm": 1.2200870513916016, "learning_rate": 1.950169648477782e-05, "loss": 0.7594, "step": 8860 }, { "epoch": 2.883864671437866, "grad_norm": 1.1982550621032715, "learning_rate": 1.947652303150808e-05, "loss": 0.7862, "step": 8865 }, { "epoch": 2.885491216655823, "grad_norm": 1.2800472974777222, "learning_rate": 1.945135546465857e-05, "loss": 0.7805, "step": 8870 }, { "epoch": 2.88711776187378, "grad_norm": 1.7053533792495728, "learning_rate": 1.942619381105061e-05, "loss": 0.7544, "step": 8875 }, { "epoch": 2.888744307091737, "grad_norm": 1.7673003673553467, "learning_rate": 1.9401038097499208e-05, "loss": 0.7863, "step": 8880 }, { "epoch": 2.8903708523096943, "grad_norm": 1.4207615852355957, "learning_rate": 1.9375888350813026e-05, "loss": 0.7423, "step": 8885 }, { "epoch": 2.891997397527651, "grad_norm": 1.3067200183868408, "learning_rate": 1.9350744597794405e-05, "loss": 0.7311, "step": 8890 }, { "epoch": 2.8936239427456085, "grad_norm": 1.660296082496643, "learning_rate": 1.9325606865239243e-05, "loss": 0.803, "step": 8895 }, { "epoch": 2.8952504879635654, "grad_norm": 1.538917064666748, "learning_rate": 1.9300475179937077e-05, "loss": 0.7608, "step": 8900 }, { "epoch": 2.8968770331815223, "grad_norm": 1.3397347927093506, "learning_rate": 1.9275349568670957e-05, "loss": 0.7455, "step": 8905 }, { "epoch": 2.8985035783994793, "grad_norm": 1.5289032459259033, "learning_rate": 1.9250230058217496e-05, "loss": 0.7589, "step": 8910 }, { "epoch": 2.9001301236174366, "grad_norm": 1.5342029333114624, "learning_rate": 1.9225116675346776e-05, "loss": 0.7697, "step": 8915 }, { "epoch": 2.9017566688353935, "grad_norm": 1.3817435503005981, "learning_rate": 1.920000944682237e-05, "loss": 0.7365, "step": 8920 }, { "epoch": 2.903383214053351, "grad_norm": 1.4527095556259155, "learning_rate": 1.9174908399401266e-05, "loss": 0.7694, "step": 8925 }, { "epoch": 2.905009759271308, "grad_norm": 1.3472769260406494, "learning_rate": 1.9149813559833897e-05, "loss": 0.7703, "step": 8930 }, { "epoch": 2.9066363044892647, "grad_norm": 1.4648215770721436, "learning_rate": 1.912472495486405e-05, "loss": 0.752, "step": 8935 }, { "epoch": 2.9082628497072216, "grad_norm": 1.3852043151855469, "learning_rate": 1.9099642611228896e-05, "loss": 0.7848, "step": 8940 }, { "epoch": 2.909889394925179, "grad_norm": 1.2881255149841309, "learning_rate": 1.907456655565891e-05, "loss": 0.7582, "step": 8945 }, { "epoch": 2.911515940143136, "grad_norm": 1.2062709331512451, "learning_rate": 1.9049496814877893e-05, "loss": 0.7289, "step": 8950 }, { "epoch": 2.913142485361093, "grad_norm": 1.3101996183395386, "learning_rate": 1.9024433415602872e-05, "loss": 0.7766, "step": 8955 }, { "epoch": 2.91476903057905, "grad_norm": 1.3560919761657715, "learning_rate": 1.899937638454416e-05, "loss": 0.783, "step": 8960 }, { "epoch": 2.916395575797007, "grad_norm": 1.4481602907180786, "learning_rate": 1.8974325748405258e-05, "loss": 0.7808, "step": 8965 }, { "epoch": 2.918022121014964, "grad_norm": 1.2113620042800903, "learning_rate": 1.8949281533882864e-05, "loss": 0.7628, "step": 8970 }, { "epoch": 2.9196486662329213, "grad_norm": 1.5030473470687866, "learning_rate": 1.8924243767666823e-05, "loss": 0.796, "step": 8975 }, { "epoch": 2.921275211450878, "grad_norm": 1.2997257709503174, "learning_rate": 1.8899212476440125e-05, "loss": 0.7656, "step": 8980 }, { "epoch": 2.9229017566688356, "grad_norm": 1.4905874729156494, "learning_rate": 1.887418768687883e-05, "loss": 0.7758, "step": 8985 }, { "epoch": 2.9245283018867925, "grad_norm": 1.2899222373962402, "learning_rate": 1.8849169425652095e-05, "loss": 0.7579, "step": 8990 }, { "epoch": 2.9261548471047494, "grad_norm": 1.329080581665039, "learning_rate": 1.8824157719422112e-05, "loss": 0.7694, "step": 8995 }, { "epoch": 2.9277813923227067, "grad_norm": 1.4542700052261353, "learning_rate": 1.8799152594844093e-05, "loss": 0.7761, "step": 9000 }, { "epoch": 2.9294079375406636, "grad_norm": 1.4678515195846558, "learning_rate": 1.8774154078566207e-05, "loss": 0.784, "step": 9005 }, { "epoch": 2.9310344827586206, "grad_norm": 1.3706289529800415, "learning_rate": 1.8749162197229626e-05, "loss": 0.7631, "step": 9010 }, { "epoch": 2.932661027976578, "grad_norm": 1.3793909549713135, "learning_rate": 1.872417697746843e-05, "loss": 0.7575, "step": 9015 }, { "epoch": 2.934287573194535, "grad_norm": 1.4092929363250732, "learning_rate": 1.8699198445909572e-05, "loss": 0.7505, "step": 9020 }, { "epoch": 2.9359141184124917, "grad_norm": 1.2127538919448853, "learning_rate": 1.8674226629172925e-05, "loss": 0.749, "step": 9025 }, { "epoch": 2.937540663630449, "grad_norm": 1.3179833889007568, "learning_rate": 1.8649261553871176e-05, "loss": 0.7713, "step": 9030 }, { "epoch": 2.939167208848406, "grad_norm": 1.3004388809204102, "learning_rate": 1.8624303246609847e-05, "loss": 0.7553, "step": 9035 }, { "epoch": 2.940793754066363, "grad_norm": 1.3201897144317627, "learning_rate": 1.8599351733987205e-05, "loss": 0.7743, "step": 9040 }, { "epoch": 2.9424202992843203, "grad_norm": 1.2810781002044678, "learning_rate": 1.8574407042594348e-05, "loss": 0.7553, "step": 9045 }, { "epoch": 2.944046844502277, "grad_norm": 1.2980822324752808, "learning_rate": 1.854946919901503e-05, "loss": 0.774, "step": 9050 }, { "epoch": 2.945673389720234, "grad_norm": 1.3434240818023682, "learning_rate": 1.8524538229825757e-05, "loss": 0.7718, "step": 9055 }, { "epoch": 2.9472999349381914, "grad_norm": 1.4152979850769043, "learning_rate": 1.8499614161595685e-05, "loss": 0.7732, "step": 9060 }, { "epoch": 2.9489264801561483, "grad_norm": 1.416679859161377, "learning_rate": 1.8474697020886636e-05, "loss": 0.7917, "step": 9065 }, { "epoch": 2.9505530253741052, "grad_norm": 1.424110770225525, "learning_rate": 1.8449786834253015e-05, "loss": 0.7533, "step": 9070 }, { "epoch": 2.9521795705920626, "grad_norm": 1.3138693571090698, "learning_rate": 1.8424883628241857e-05, "loss": 0.7445, "step": 9075 }, { "epoch": 2.9538061158100195, "grad_norm": 1.3573354482650757, "learning_rate": 1.8399987429392722e-05, "loss": 0.7597, "step": 9080 }, { "epoch": 2.9554326610279764, "grad_norm": 1.4527267217636108, "learning_rate": 1.837509826423773e-05, "loss": 0.757, "step": 9085 }, { "epoch": 2.9570592062459338, "grad_norm": 1.4809155464172363, "learning_rate": 1.8350216159301483e-05, "loss": 0.7729, "step": 9090 }, { "epoch": 2.9586857514638907, "grad_norm": 1.2153171300888062, "learning_rate": 1.8325341141101087e-05, "loss": 0.7399, "step": 9095 }, { "epoch": 2.960312296681848, "grad_norm": 1.3090033531188965, "learning_rate": 1.830047323614606e-05, "loss": 0.7466, "step": 9100 }, { "epoch": 2.961938841899805, "grad_norm": 1.3680044412612915, "learning_rate": 1.827561247093836e-05, "loss": 0.7449, "step": 9105 }, { "epoch": 2.963565387117762, "grad_norm": 1.3589669466018677, "learning_rate": 1.8250758871972335e-05, "loss": 0.7494, "step": 9110 }, { "epoch": 2.9651919323357188, "grad_norm": 1.487540602684021, "learning_rate": 1.82259124657347e-05, "loss": 0.7607, "step": 9115 }, { "epoch": 2.966818477553676, "grad_norm": 1.3342360258102417, "learning_rate": 1.8201073278704492e-05, "loss": 0.764, "step": 9120 }, { "epoch": 2.968445022771633, "grad_norm": 1.4702714681625366, "learning_rate": 1.8176241337353073e-05, "loss": 0.7694, "step": 9125 }, { "epoch": 2.9700715679895904, "grad_norm": 1.3969438076019287, "learning_rate": 1.815141666814405e-05, "loss": 0.7558, "step": 9130 }, { "epoch": 2.9716981132075473, "grad_norm": 1.3822335004806519, "learning_rate": 1.812659929753332e-05, "loss": 0.7685, "step": 9135 }, { "epoch": 2.973324658425504, "grad_norm": 1.4510034322738647, "learning_rate": 1.810178925196897e-05, "loss": 0.7942, "step": 9140 }, { "epoch": 2.974951203643461, "grad_norm": 1.253931999206543, "learning_rate": 1.80769865578913e-05, "loss": 0.7728, "step": 9145 }, { "epoch": 2.9765777488614185, "grad_norm": 1.4350529909133911, "learning_rate": 1.805219124173278e-05, "loss": 0.7522, "step": 9150 }, { "epoch": 2.9782042940793754, "grad_norm": 1.3868224620819092, "learning_rate": 1.802740332991799e-05, "loss": 0.7852, "step": 9155 }, { "epoch": 2.9798308392973327, "grad_norm": 1.4791457653045654, "learning_rate": 1.800262284886365e-05, "loss": 0.7313, "step": 9160 }, { "epoch": 2.9814573845152896, "grad_norm": 1.4920134544372559, "learning_rate": 1.797784982497853e-05, "loss": 0.7717, "step": 9165 }, { "epoch": 2.9830839297332465, "grad_norm": 1.5016266107559204, "learning_rate": 1.7953084284663486e-05, "loss": 0.7864, "step": 9170 }, { "epoch": 2.9847104749512035, "grad_norm": 1.3318835496902466, "learning_rate": 1.7928326254311363e-05, "loss": 0.767, "step": 9175 }, { "epoch": 2.986337020169161, "grad_norm": 1.3156347274780273, "learning_rate": 1.7903575760307044e-05, "loss": 0.742, "step": 9180 }, { "epoch": 2.9879635653871177, "grad_norm": 1.6698607206344604, "learning_rate": 1.787883282902734e-05, "loss": 0.7891, "step": 9185 }, { "epoch": 2.989590110605075, "grad_norm": 1.6913663148880005, "learning_rate": 1.7854097486841044e-05, "loss": 0.8007, "step": 9190 }, { "epoch": 2.991216655823032, "grad_norm": 1.2719675302505493, "learning_rate": 1.782936976010881e-05, "loss": 0.7679, "step": 9195 }, { "epoch": 2.992843201040989, "grad_norm": 1.500932216644287, "learning_rate": 1.7804649675183223e-05, "loss": 0.7608, "step": 9200 }, { "epoch": 2.994469746258946, "grad_norm": 1.2967573404312134, "learning_rate": 1.7779937258408685e-05, "loss": 0.7649, "step": 9205 }, { "epoch": 2.996096291476903, "grad_norm": 1.5139952898025513, "learning_rate": 1.7755232536121477e-05, "loss": 0.731, "step": 9210 }, { "epoch": 2.99772283669486, "grad_norm": 1.4743283987045288, "learning_rate": 1.7730535534649614e-05, "loss": 0.7989, "step": 9215 }, { "epoch": 2.9993493819128174, "grad_norm": 1.4576674699783325, "learning_rate": 1.7705846280312948e-05, "loss": 0.7149, "step": 9220 }, { "epoch": 3.0, "eval_f1": 0.8162158817143809, "eval_loss": 0.423828125, "eval_precision": 0.8168015799887587, "eval_recall": 0.8157541959580055, "eval_runtime": 386.4595, "eval_samples_per_second": 1018.05, "eval_steps_per_second": 1.99, "step": 9222 }, { "epoch": 3.0009759271307743, "grad_norm": 1.3660796880722046, "learning_rate": 1.768116479942303e-05, "loss": 0.7174, "step": 9225 }, { "epoch": 3.0026024723487312, "grad_norm": 1.5472331047058105, "learning_rate": 1.7656491118283135e-05, "loss": 0.7111, "step": 9230 }, { "epoch": 3.004229017566688, "grad_norm": 1.2640855312347412, "learning_rate": 1.7631825263188246e-05, "loss": 0.7295, "step": 9235 }, { "epoch": 3.0058555627846455, "grad_norm": 1.314046025276184, "learning_rate": 1.760716726042499e-05, "loss": 0.6987, "step": 9240 }, { "epoch": 3.0074821080026024, "grad_norm": 1.3935083150863647, "learning_rate": 1.7582517136271616e-05, "loss": 0.6947, "step": 9245 }, { "epoch": 3.0091086532205593, "grad_norm": 1.3526424169540405, "learning_rate": 1.7557874916997996e-05, "loss": 0.7062, "step": 9250 }, { "epoch": 3.0107351984385167, "grad_norm": 1.6264359951019287, "learning_rate": 1.7533240628865567e-05, "loss": 0.7311, "step": 9255 }, { "epoch": 3.0123617436564736, "grad_norm": 1.4308100938796997, "learning_rate": 1.7508614298127322e-05, "loss": 0.7189, "step": 9260 }, { "epoch": 3.0139882888744305, "grad_norm": 1.5718538761138916, "learning_rate": 1.7483995951027767e-05, "loss": 0.6965, "step": 9265 }, { "epoch": 3.015614834092388, "grad_norm": 1.4989243745803833, "learning_rate": 1.7459385613802903e-05, "loss": 0.7387, "step": 9270 }, { "epoch": 3.0172413793103448, "grad_norm": 1.290700912475586, "learning_rate": 1.743478331268018e-05, "loss": 0.737, "step": 9275 }, { "epoch": 3.018867924528302, "grad_norm": 1.379231572151184, "learning_rate": 1.7410189073878513e-05, "loss": 0.6917, "step": 9280 }, { "epoch": 3.020494469746259, "grad_norm": 1.4544013738632202, "learning_rate": 1.7385602923608192e-05, "loss": 0.6943, "step": 9285 }, { "epoch": 3.022121014964216, "grad_norm": 1.7482510805130005, "learning_rate": 1.736102488807092e-05, "loss": 0.7295, "step": 9290 }, { "epoch": 3.0237475601821733, "grad_norm": 1.4627649784088135, "learning_rate": 1.7336454993459726e-05, "loss": 0.7299, "step": 9295 }, { "epoch": 3.02537410540013, "grad_norm": 1.4708366394042969, "learning_rate": 1.7311893265958974e-05, "loss": 0.7438, "step": 9300 }, { "epoch": 3.027000650618087, "grad_norm": 1.466259479522705, "learning_rate": 1.7287339731744336e-05, "loss": 0.7108, "step": 9305 }, { "epoch": 3.0286271958360445, "grad_norm": 1.3860840797424316, "learning_rate": 1.7262794416982716e-05, "loss": 0.7288, "step": 9310 }, { "epoch": 3.0302537410540014, "grad_norm": 1.4637877941131592, "learning_rate": 1.72382573478323e-05, "loss": 0.7181, "step": 9315 }, { "epoch": 3.0318802862719583, "grad_norm": 1.4799340963363647, "learning_rate": 1.721372855044246e-05, "loss": 0.7272, "step": 9320 }, { "epoch": 3.0335068314899156, "grad_norm": 1.4379684925079346, "learning_rate": 1.7189208050953765e-05, "loss": 0.7176, "step": 9325 }, { "epoch": 3.0351333767078725, "grad_norm": 1.6755834817886353, "learning_rate": 1.7164695875497928e-05, "loss": 0.702, "step": 9330 }, { "epoch": 3.0367599219258294, "grad_norm": 1.7429792881011963, "learning_rate": 1.714019205019782e-05, "loss": 0.701, "step": 9335 }, { "epoch": 3.038386467143787, "grad_norm": 1.5727760791778564, "learning_rate": 1.711569660116737e-05, "loss": 0.7172, "step": 9340 }, { "epoch": 3.0400130123617437, "grad_norm": 1.322109341621399, "learning_rate": 1.709120955451162e-05, "loss": 0.7465, "step": 9345 }, { "epoch": 3.0416395575797006, "grad_norm": 1.6154533624649048, "learning_rate": 1.706673093632663e-05, "loss": 0.7309, "step": 9350 }, { "epoch": 3.043266102797658, "grad_norm": 1.4292712211608887, "learning_rate": 1.70422607726995e-05, "loss": 0.7132, "step": 9355 }, { "epoch": 3.044892648015615, "grad_norm": 1.922465205192566, "learning_rate": 1.7017799089708293e-05, "loss": 0.7182, "step": 9360 }, { "epoch": 3.046519193233572, "grad_norm": 1.722861886024475, "learning_rate": 1.699334591342207e-05, "loss": 0.7414, "step": 9365 }, { "epoch": 3.048145738451529, "grad_norm": 1.5007615089416504, "learning_rate": 1.696890126990079e-05, "loss": 0.7183, "step": 9370 }, { "epoch": 3.049772283669486, "grad_norm": 1.874970555305481, "learning_rate": 1.694446518519534e-05, "loss": 0.75, "step": 9375 }, { "epoch": 3.051398828887443, "grad_norm": 1.430453896522522, "learning_rate": 1.692003768534747e-05, "loss": 0.7131, "step": 9380 }, { "epoch": 3.0530253741054003, "grad_norm": 1.4646815061569214, "learning_rate": 1.689561879638982e-05, "loss": 0.7375, "step": 9385 }, { "epoch": 3.0546519193233572, "grad_norm": 1.474249243736267, "learning_rate": 1.687120854434579e-05, "loss": 0.7208, "step": 9390 }, { "epoch": 3.056278464541314, "grad_norm": 1.530264139175415, "learning_rate": 1.684680695522964e-05, "loss": 0.7213, "step": 9395 }, { "epoch": 3.0579050097592715, "grad_norm": 1.5587159395217896, "learning_rate": 1.682241405504634e-05, "loss": 0.7201, "step": 9400 }, { "epoch": 3.0595315549772284, "grad_norm": 1.4787306785583496, "learning_rate": 1.679802986979165e-05, "loss": 0.7096, "step": 9405 }, { "epoch": 3.0611581001951853, "grad_norm": 1.4778773784637451, "learning_rate": 1.6773654425452007e-05, "loss": 0.7052, "step": 9410 }, { "epoch": 3.0627846454131427, "grad_norm": 1.2918109893798828, "learning_rate": 1.6749287748004567e-05, "loss": 0.7173, "step": 9415 }, { "epoch": 3.0644111906310996, "grad_norm": 1.8076776266098022, "learning_rate": 1.6724929863417094e-05, "loss": 0.6876, "step": 9420 }, { "epoch": 3.0660377358490565, "grad_norm": 1.5048235654830933, "learning_rate": 1.670058079764802e-05, "loss": 0.7116, "step": 9425 }, { "epoch": 3.067664281067014, "grad_norm": 1.5065021514892578, "learning_rate": 1.6676240576646387e-05, "loss": 0.6994, "step": 9430 }, { "epoch": 3.0692908262849707, "grad_norm": 1.5438202619552612, "learning_rate": 1.665190922635177e-05, "loss": 0.7246, "step": 9435 }, { "epoch": 3.0709173715029277, "grad_norm": 1.4787880182266235, "learning_rate": 1.662758677269432e-05, "loss": 0.722, "step": 9440 }, { "epoch": 3.072543916720885, "grad_norm": 1.6293792724609375, "learning_rate": 1.66032732415947e-05, "loss": 0.7287, "step": 9445 }, { "epoch": 3.074170461938842, "grad_norm": 1.4706617593765259, "learning_rate": 1.657896865896407e-05, "loss": 0.7154, "step": 9450 }, { "epoch": 3.075797007156799, "grad_norm": 1.1339417695999146, "learning_rate": 1.6554673050704038e-05, "loss": 0.721, "step": 9455 }, { "epoch": 3.077423552374756, "grad_norm": 1.6174836158752441, "learning_rate": 1.6535243043042624e-05, "loss": 0.7097, "step": 9460 }, { "epoch": 3.079050097592713, "grad_norm": 1.8702113628387451, "learning_rate": 1.651096365389121e-05, "loss": 0.6933, "step": 9465 }, { "epoch": 3.08067664281067, "grad_norm": 1.454704761505127, "learning_rate": 1.6486693311583946e-05, "loss": 0.7507, "step": 9470 }, { "epoch": 3.0823031880286273, "grad_norm": 1.861071228981018, "learning_rate": 1.6462432041985988e-05, "loss": 0.7547, "step": 9475 }, { "epoch": 3.0839297332465843, "grad_norm": 1.4772759675979614, "learning_rate": 1.6438179870952762e-05, "loss": 0.7036, "step": 9480 }, { "epoch": 3.085556278464541, "grad_norm": 1.302554726600647, "learning_rate": 1.641393682433005e-05, "loss": 0.7033, "step": 9485 }, { "epoch": 3.0871828236824985, "grad_norm": 1.4052358865737915, "learning_rate": 1.6389702927953876e-05, "loss": 0.721, "step": 9490 }, { "epoch": 3.0888093689004554, "grad_norm": 1.5605618953704834, "learning_rate": 1.6365478207650548e-05, "loss": 0.725, "step": 9495 }, { "epoch": 3.0904359141184123, "grad_norm": 1.23482084274292, "learning_rate": 1.634126268923655e-05, "loss": 0.7088, "step": 9500 }, { "epoch": 3.0920624593363697, "grad_norm": 1.6715662479400635, "learning_rate": 1.6317056398518603e-05, "loss": 0.6693, "step": 9505 }, { "epoch": 3.0936890045543266, "grad_norm": 1.7307251691818237, "learning_rate": 1.6292859361293554e-05, "loss": 0.7008, "step": 9510 }, { "epoch": 3.0953155497722835, "grad_norm": 1.8253427743911743, "learning_rate": 1.626867160334843e-05, "loss": 0.7397, "step": 9515 }, { "epoch": 3.096942094990241, "grad_norm": 1.3618115186691284, "learning_rate": 1.624449315046032e-05, "loss": 0.7269, "step": 9520 }, { "epoch": 3.0985686402081978, "grad_norm": 1.319265365600586, "learning_rate": 1.622032402839645e-05, "loss": 0.7107, "step": 9525 }, { "epoch": 3.1001951854261547, "grad_norm": 1.5050058364868164, "learning_rate": 1.6196164262914064e-05, "loss": 0.7153, "step": 9530 }, { "epoch": 3.101821730644112, "grad_norm": 1.7767952680587769, "learning_rate": 1.617201387976045e-05, "loss": 0.7352, "step": 9535 }, { "epoch": 3.103448275862069, "grad_norm": 1.5719988346099854, "learning_rate": 1.6147872904672887e-05, "loss": 0.7272, "step": 9540 }, { "epoch": 3.105074821080026, "grad_norm": 1.587790846824646, "learning_rate": 1.612374136337864e-05, "loss": 0.6901, "step": 9545 }, { "epoch": 3.106701366297983, "grad_norm": 1.547452449798584, "learning_rate": 1.609961928159491e-05, "loss": 0.7292, "step": 9550 }, { "epoch": 3.10832791151594, "grad_norm": 1.8181825876235962, "learning_rate": 1.6075506685028825e-05, "loss": 0.7167, "step": 9555 }, { "epoch": 3.109954456733897, "grad_norm": 1.717094898223877, "learning_rate": 1.6051403599377405e-05, "loss": 0.7095, "step": 9560 }, { "epoch": 3.1115810019518544, "grad_norm": 1.6198291778564453, "learning_rate": 1.6027310050327522e-05, "loss": 0.7287, "step": 9565 }, { "epoch": 3.1132075471698113, "grad_norm": 1.4881227016448975, "learning_rate": 1.6003226063555905e-05, "loss": 0.7282, "step": 9570 }, { "epoch": 3.114834092387768, "grad_norm": 1.5699553489685059, "learning_rate": 1.5979151664729062e-05, "loss": 0.7395, "step": 9575 }, { "epoch": 3.1164606376057256, "grad_norm": 1.5021533966064453, "learning_rate": 1.5955086879503316e-05, "loss": 0.706, "step": 9580 }, { "epoch": 3.1180871828236825, "grad_norm": 1.3850361108779907, "learning_rate": 1.5931031733524727e-05, "loss": 0.6917, "step": 9585 }, { "epoch": 3.1197137280416394, "grad_norm": 1.6612331867218018, "learning_rate": 1.5906986252429087e-05, "loss": 0.7029, "step": 9590 }, { "epoch": 3.1213402732595967, "grad_norm": 1.5954173803329468, "learning_rate": 1.5882950461841872e-05, "loss": 0.7325, "step": 9595 }, { "epoch": 3.1229668184775536, "grad_norm": 1.5727146863937378, "learning_rate": 1.585892438737827e-05, "loss": 0.7234, "step": 9600 }, { "epoch": 3.1245933636955106, "grad_norm": 1.4521474838256836, "learning_rate": 1.5834908054643073e-05, "loss": 0.73, "step": 9605 }, { "epoch": 3.126219908913468, "grad_norm": 1.3713246583938599, "learning_rate": 1.581090148923071e-05, "loss": 0.7294, "step": 9610 }, { "epoch": 3.127846454131425, "grad_norm": 1.5553102493286133, "learning_rate": 1.5786904716725196e-05, "loss": 0.6802, "step": 9615 }, { "epoch": 3.1294729993493817, "grad_norm": 1.575659990310669, "learning_rate": 1.576291776270013e-05, "loss": 0.7104, "step": 9620 }, { "epoch": 3.131099544567339, "grad_norm": 1.5438932180404663, "learning_rate": 1.5738940652718594e-05, "loss": 0.716, "step": 9625 }, { "epoch": 3.132726089785296, "grad_norm": 1.7387442588806152, "learning_rate": 1.5714973412333257e-05, "loss": 0.74, "step": 9630 }, { "epoch": 3.134352635003253, "grad_norm": 1.4178088903427124, "learning_rate": 1.56910160670862e-05, "loss": 0.7168, "step": 9635 }, { "epoch": 3.1359791802212102, "grad_norm": 1.5315080881118774, "learning_rate": 1.5667068642508996e-05, "loss": 0.7091, "step": 9640 }, { "epoch": 3.137605725439167, "grad_norm": 1.4313857555389404, "learning_rate": 1.5643131164122626e-05, "loss": 0.727, "step": 9645 }, { "epoch": 3.139232270657124, "grad_norm": 1.647253394126892, "learning_rate": 1.5619203657437503e-05, "loss": 0.7064, "step": 9650 }, { "epoch": 3.1408588158750814, "grad_norm": 1.7272855043411255, "learning_rate": 1.5595286147953364e-05, "loss": 0.701, "step": 9655 }, { "epoch": 3.1424853610930383, "grad_norm": 1.5717560052871704, "learning_rate": 1.5571378661159337e-05, "loss": 0.6957, "step": 9660 }, { "epoch": 3.1441119063109952, "grad_norm": 1.5355433225631714, "learning_rate": 1.5547481222533846e-05, "loss": 0.6857, "step": 9665 }, { "epoch": 3.1457384515289526, "grad_norm": 1.6073753833770752, "learning_rate": 1.552359385754461e-05, "loss": 0.7593, "step": 9670 }, { "epoch": 3.1473649967469095, "grad_norm": 4.703303813934326, "learning_rate": 1.549971659164861e-05, "loss": 0.7313, "step": 9675 }, { "epoch": 3.1489915419648664, "grad_norm": 1.5290316343307495, "learning_rate": 1.5475849450292085e-05, "loss": 0.7029, "step": 9680 }, { "epoch": 3.1506180871828238, "grad_norm": 1.3305752277374268, "learning_rate": 1.5451992458910442e-05, "loss": 0.7319, "step": 9685 }, { "epoch": 3.1522446324007807, "grad_norm": 1.7302073240280151, "learning_rate": 1.542814564292831e-05, "loss": 0.7177, "step": 9690 }, { "epoch": 3.153871177618738, "grad_norm": 1.3839761018753052, "learning_rate": 1.540430902775946e-05, "loss": 0.6713, "step": 9695 }, { "epoch": 3.155497722836695, "grad_norm": 1.5310893058776855, "learning_rate": 1.5380482638806794e-05, "loss": 0.7193, "step": 9700 }, { "epoch": 3.157124268054652, "grad_norm": 1.4911049604415894, "learning_rate": 1.5356666501462314e-05, "loss": 0.6861, "step": 9705 }, { "epoch": 3.1587508132726088, "grad_norm": 1.6888084411621094, "learning_rate": 1.533286064110709e-05, "loss": 0.738, "step": 9710 }, { "epoch": 3.160377358490566, "grad_norm": 1.4835600852966309, "learning_rate": 1.5309065083111255e-05, "loss": 0.6952, "step": 9715 }, { "epoch": 3.162003903708523, "grad_norm": 1.5391262769699097, "learning_rate": 1.5285279852833944e-05, "loss": 0.6985, "step": 9720 }, { "epoch": 3.1636304489264804, "grad_norm": 1.4776674509048462, "learning_rate": 1.5261504975623306e-05, "loss": 0.706, "step": 9725 }, { "epoch": 3.1652569941444373, "grad_norm": 1.6254886388778687, "learning_rate": 1.5237740476816436e-05, "loss": 0.7066, "step": 9730 }, { "epoch": 3.166883539362394, "grad_norm": 1.6165482997894287, "learning_rate": 1.5213986381739393e-05, "loss": 0.7039, "step": 9735 }, { "epoch": 3.168510084580351, "grad_norm": 1.6021651029586792, "learning_rate": 1.519024271570712e-05, "loss": 0.723, "step": 9740 }, { "epoch": 3.1701366297983085, "grad_norm": 1.3676519393920898, "learning_rate": 1.5166509504023473e-05, "loss": 0.7026, "step": 9745 }, { "epoch": 3.1717631750162654, "grad_norm": 1.5719008445739746, "learning_rate": 1.5142786771981146e-05, "loss": 0.7059, "step": 9750 }, { "epoch": 3.1733897202342227, "grad_norm": 1.551961064338684, "learning_rate": 1.5119074544861678e-05, "loss": 0.7075, "step": 9755 }, { "epoch": 3.1750162654521796, "grad_norm": 1.5193449258804321, "learning_rate": 1.5095372847935396e-05, "loss": 0.7044, "step": 9760 }, { "epoch": 3.1766428106701365, "grad_norm": 1.7731696367263794, "learning_rate": 1.5071681706461438e-05, "loss": 0.6854, "step": 9765 }, { "epoch": 3.178269355888094, "grad_norm": 1.7118431329727173, "learning_rate": 1.5048001145687646e-05, "loss": 0.7302, "step": 9770 }, { "epoch": 3.179895901106051, "grad_norm": 1.4772725105285645, "learning_rate": 1.5024331190850637e-05, "loss": 0.7273, "step": 9775 }, { "epoch": 3.1815224463240077, "grad_norm": 1.5707112550735474, "learning_rate": 1.5000671867175678e-05, "loss": 0.7173, "step": 9780 }, { "epoch": 3.183148991541965, "grad_norm": 1.5867023468017578, "learning_rate": 1.4977023199876743e-05, "loss": 0.7471, "step": 9785 }, { "epoch": 3.184775536759922, "grad_norm": 1.5893350839614868, "learning_rate": 1.4953385214156423e-05, "loss": 0.7315, "step": 9790 }, { "epoch": 3.186402081977879, "grad_norm": 1.6009336709976196, "learning_rate": 1.4929757935205951e-05, "loss": 0.7417, "step": 9795 }, { "epoch": 3.1880286271958362, "grad_norm": 1.7628577947616577, "learning_rate": 1.490614138820512e-05, "loss": 0.7384, "step": 9800 }, { "epoch": 3.189655172413793, "grad_norm": 1.6338847875595093, "learning_rate": 1.4882535598322311e-05, "loss": 0.6852, "step": 9805 }, { "epoch": 3.19128171763175, "grad_norm": 1.404396891593933, "learning_rate": 1.4858940590714427e-05, "loss": 0.708, "step": 9810 }, { "epoch": 3.1929082628497074, "grad_norm": 1.5045236349105835, "learning_rate": 1.4835356390526888e-05, "loss": 0.6954, "step": 9815 }, { "epoch": 3.1945348080676643, "grad_norm": 1.6785997152328491, "learning_rate": 1.481178302289359e-05, "loss": 0.7124, "step": 9820 }, { "epoch": 3.1961613532856212, "grad_norm": 1.522944450378418, "learning_rate": 1.478822051293689e-05, "loss": 0.7061, "step": 9825 }, { "epoch": 3.1977878985035786, "grad_norm": 1.6956477165222168, "learning_rate": 1.476466888576758e-05, "loss": 0.6949, "step": 9830 }, { "epoch": 3.1994144437215355, "grad_norm": 1.3752361536026, "learning_rate": 1.4741128166484824e-05, "loss": 0.6984, "step": 9835 }, { "epoch": 3.2010409889394924, "grad_norm": 1.7044976949691772, "learning_rate": 1.4717598380176212e-05, "loss": 0.7127, "step": 9840 }, { "epoch": 3.2026675341574498, "grad_norm": 1.5294950008392334, "learning_rate": 1.4694079551917629e-05, "loss": 0.7283, "step": 9845 }, { "epoch": 3.2042940793754067, "grad_norm": 1.3289799690246582, "learning_rate": 1.4670571706773318e-05, "loss": 0.731, "step": 9850 }, { "epoch": 3.2059206245933636, "grad_norm": 1.4929267168045044, "learning_rate": 1.4647074869795802e-05, "loss": 0.6821, "step": 9855 }, { "epoch": 3.207547169811321, "grad_norm": 1.678856611251831, "learning_rate": 1.462358906602589e-05, "loss": 0.7286, "step": 9860 }, { "epoch": 3.209173715029278, "grad_norm": 1.6927977800369263, "learning_rate": 1.4600114320492594e-05, "loss": 0.7238, "step": 9865 }, { "epoch": 3.2108002602472347, "grad_norm": 1.5571045875549316, "learning_rate": 1.4576650658213191e-05, "loss": 0.6965, "step": 9870 }, { "epoch": 3.212426805465192, "grad_norm": 1.6167880296707153, "learning_rate": 1.4553198104193094e-05, "loss": 0.7338, "step": 9875 }, { "epoch": 3.214053350683149, "grad_norm": 1.5416876077651978, "learning_rate": 1.452975668342594e-05, "loss": 0.7582, "step": 9880 }, { "epoch": 3.215679895901106, "grad_norm": 1.884232521057129, "learning_rate": 1.450632642089344e-05, "loss": 0.6966, "step": 9885 }, { "epoch": 3.2173064411190633, "grad_norm": 1.469514012336731, "learning_rate": 1.448290734156546e-05, "loss": 0.7052, "step": 9890 }, { "epoch": 3.21893298633702, "grad_norm": 1.666800856590271, "learning_rate": 1.445949947039991e-05, "loss": 0.6762, "step": 9895 }, { "epoch": 3.220559531554977, "grad_norm": 1.4748789072036743, "learning_rate": 1.443610283234279e-05, "loss": 0.7043, "step": 9900 }, { "epoch": 3.2221860767729344, "grad_norm": 1.6284899711608887, "learning_rate": 1.4412717452328084e-05, "loss": 0.7114, "step": 9905 }, { "epoch": 3.2238126219908914, "grad_norm": 1.5342994928359985, "learning_rate": 1.4389343355277852e-05, "loss": 0.7112, "step": 9910 }, { "epoch": 3.2254391672088483, "grad_norm": 1.387032389640808, "learning_rate": 1.4365980566102044e-05, "loss": 0.7171, "step": 9915 }, { "epoch": 3.2270657124268056, "grad_norm": 1.4326534271240234, "learning_rate": 1.4342629109698627e-05, "loss": 0.6751, "step": 9920 }, { "epoch": 3.2286922576447625, "grad_norm": 1.5771503448486328, "learning_rate": 1.431928901095344e-05, "loss": 0.7057, "step": 9925 }, { "epoch": 3.2303188028627194, "grad_norm": 1.6068642139434814, "learning_rate": 1.4295960294740263e-05, "loss": 0.7194, "step": 9930 }, { "epoch": 3.231945348080677, "grad_norm": 1.7702356576919556, "learning_rate": 1.4272642985920704e-05, "loss": 0.7322, "step": 9935 }, { "epoch": 3.2335718932986337, "grad_norm": 1.3826334476470947, "learning_rate": 1.4249337109344241e-05, "loss": 0.7101, "step": 9940 }, { "epoch": 3.2351984385165906, "grad_norm": 1.5163577795028687, "learning_rate": 1.4226042689848163e-05, "loss": 0.7131, "step": 9945 }, { "epoch": 3.236824983734548, "grad_norm": 1.5844553709030151, "learning_rate": 1.4202759752257555e-05, "loss": 0.6993, "step": 9950 }, { "epoch": 3.238451528952505, "grad_norm": 1.7427393198013306, "learning_rate": 1.4179488321385243e-05, "loss": 0.7216, "step": 9955 }, { "epoch": 3.240078074170462, "grad_norm": 1.4533311128616333, "learning_rate": 1.415622842203182e-05, "loss": 0.725, "step": 9960 }, { "epoch": 3.241704619388419, "grad_norm": 1.5041154623031616, "learning_rate": 1.4132980078985553e-05, "loss": 0.7022, "step": 9965 }, { "epoch": 3.243331164606376, "grad_norm": 1.9512314796447754, "learning_rate": 1.4109743317022434e-05, "loss": 0.7412, "step": 9970 }, { "epoch": 3.244957709824333, "grad_norm": 1.5821571350097656, "learning_rate": 1.4086518160906084e-05, "loss": 0.6979, "step": 9975 }, { "epoch": 3.2465842550422903, "grad_norm": 1.440798044204712, "learning_rate": 1.4063304635387773e-05, "loss": 0.72, "step": 9980 }, { "epoch": 3.248210800260247, "grad_norm": 1.6552304029464722, "learning_rate": 1.4040102765206375e-05, "loss": 0.7336, "step": 9985 }, { "epoch": 3.249837345478204, "grad_norm": 1.539847493171692, "learning_rate": 1.4016912575088318e-05, "loss": 0.7261, "step": 9990 }, { "epoch": 3.2514638906961615, "grad_norm": 1.4375213384628296, "learning_rate": 1.3993734089747617e-05, "loss": 0.7189, "step": 9995 }, { "epoch": 3.2530904359141184, "grad_norm": 1.5719690322875977, "learning_rate": 1.3970567333885786e-05, "loss": 0.7117, "step": 10000 }, { "epoch": 3.2547169811320753, "grad_norm": 2.038811683654785, "learning_rate": 1.3947412332191855e-05, "loss": 0.7494, "step": 10005 }, { "epoch": 3.2563435263500327, "grad_norm": 1.454071283340454, "learning_rate": 1.392426910934232e-05, "loss": 0.7161, "step": 10010 }, { "epoch": 3.2579700715679896, "grad_norm": 1.6480475664138794, "learning_rate": 1.3901137690001137e-05, "loss": 0.6772, "step": 10015 }, { "epoch": 3.2595966167859465, "grad_norm": 1.6306781768798828, "learning_rate": 1.3878018098819657e-05, "loss": 0.7098, "step": 10020 }, { "epoch": 3.261223162003904, "grad_norm": 1.6802515983581543, "learning_rate": 1.3854910360436657e-05, "loss": 0.7156, "step": 10025 }, { "epoch": 3.2628497072218607, "grad_norm": 1.633367657661438, "learning_rate": 1.383181449947825e-05, "loss": 0.7353, "step": 10030 }, { "epoch": 3.2644762524398176, "grad_norm": 1.6299443244934082, "learning_rate": 1.3808730540557913e-05, "loss": 0.695, "step": 10035 }, { "epoch": 3.266102797657775, "grad_norm": 1.8901253938674927, "learning_rate": 1.3785658508276436e-05, "loss": 0.7438, "step": 10040 }, { "epoch": 3.267729342875732, "grad_norm": 1.522379994392395, "learning_rate": 1.3762598427221906e-05, "loss": 0.7391, "step": 10045 }, { "epoch": 3.269355888093689, "grad_norm": 1.387171745300293, "learning_rate": 1.3739550321969647e-05, "loss": 0.7309, "step": 10050 }, { "epoch": 3.270982433311646, "grad_norm": 1.5634115934371948, "learning_rate": 1.3716514217082249e-05, "loss": 0.7214, "step": 10055 }, { "epoch": 3.272608978529603, "grad_norm": 1.6006046533584595, "learning_rate": 1.3693490137109485e-05, "loss": 0.6994, "step": 10060 }, { "epoch": 3.27423552374756, "grad_norm": 1.5280417203903198, "learning_rate": 1.3670478106588341e-05, "loss": 0.703, "step": 10065 }, { "epoch": 3.2758620689655173, "grad_norm": 1.3558233976364136, "learning_rate": 1.364747815004295e-05, "loss": 0.7164, "step": 10070 }, { "epoch": 3.2774886141834743, "grad_norm": 1.4231760501861572, "learning_rate": 1.3624490291984582e-05, "loss": 0.6919, "step": 10075 }, { "epoch": 3.279115159401431, "grad_norm": 1.425530195236206, "learning_rate": 1.3601514556911596e-05, "loss": 0.7426, "step": 10080 }, { "epoch": 3.2807417046193885, "grad_norm": 1.632760763168335, "learning_rate": 1.3578550969309459e-05, "loss": 0.6994, "step": 10085 }, { "epoch": 3.2823682498373454, "grad_norm": 1.5265684127807617, "learning_rate": 1.3555599553650658e-05, "loss": 0.6966, "step": 10090 }, { "epoch": 3.2839947950553023, "grad_norm": 1.580483078956604, "learning_rate": 1.3532660334394742e-05, "loss": 0.7195, "step": 10095 }, { "epoch": 3.2856213402732597, "grad_norm": 1.6560685634613037, "learning_rate": 1.3509733335988245e-05, "loss": 0.7102, "step": 10100 }, { "epoch": 3.2872478854912166, "grad_norm": 1.4860405921936035, "learning_rate": 1.3486818582864678e-05, "loss": 0.7153, "step": 10105 }, { "epoch": 3.288874430709174, "grad_norm": 1.608243703842163, "learning_rate": 1.3463916099444518e-05, "loss": 0.7567, "step": 10110 }, { "epoch": 3.290500975927131, "grad_norm": 1.5727096796035767, "learning_rate": 1.344102591013513e-05, "loss": 0.7125, "step": 10115 }, { "epoch": 3.2921275211450878, "grad_norm": 1.5452401638031006, "learning_rate": 1.3418148039330822e-05, "loss": 0.7323, "step": 10120 }, { "epoch": 3.2937540663630447, "grad_norm": 1.8954321146011353, "learning_rate": 1.339528251141273e-05, "loss": 0.7256, "step": 10125 }, { "epoch": 3.295380611581002, "grad_norm": 1.5137685537338257, "learning_rate": 1.3372429350748866e-05, "loss": 0.7211, "step": 10130 }, { "epoch": 3.297007156798959, "grad_norm": 1.448923945426941, "learning_rate": 1.3349588581694058e-05, "loss": 0.6866, "step": 10135 }, { "epoch": 3.2986337020169163, "grad_norm": 1.4570305347442627, "learning_rate": 1.332676022858993e-05, "loss": 0.6978, "step": 10140 }, { "epoch": 3.300260247234873, "grad_norm": 1.5461809635162354, "learning_rate": 1.3303944315764848e-05, "loss": 0.7123, "step": 10145 }, { "epoch": 3.30188679245283, "grad_norm": 1.457127571105957, "learning_rate": 1.3281140867533962e-05, "loss": 0.6912, "step": 10150 }, { "epoch": 3.303513337670787, "grad_norm": 1.7911895513534546, "learning_rate": 1.3258349908199098e-05, "loss": 0.693, "step": 10155 }, { "epoch": 3.3051398828887444, "grad_norm": 1.7300539016723633, "learning_rate": 1.3235571462048795e-05, "loss": 0.7113, "step": 10160 }, { "epoch": 3.3067664281067013, "grad_norm": 1.6573905944824219, "learning_rate": 1.321280555335826e-05, "loss": 0.7091, "step": 10165 }, { "epoch": 3.3083929733246586, "grad_norm": 1.4438972473144531, "learning_rate": 1.3190052206389337e-05, "loss": 0.6904, "step": 10170 }, { "epoch": 3.3100195185426156, "grad_norm": 1.483851671218872, "learning_rate": 1.3167311445390456e-05, "loss": 0.678, "step": 10175 }, { "epoch": 3.3116460637605725, "grad_norm": 1.3699018955230713, "learning_rate": 1.314458329459668e-05, "loss": 0.7167, "step": 10180 }, { "epoch": 3.3132726089785294, "grad_norm": 1.7156723737716675, "learning_rate": 1.3121867778229588e-05, "loss": 0.7014, "step": 10185 }, { "epoch": 3.3148991541964867, "grad_norm": 1.7358932495117188, "learning_rate": 1.309916492049732e-05, "loss": 0.7227, "step": 10190 }, { "epoch": 3.3165256994144436, "grad_norm": 1.649723768234253, "learning_rate": 1.3076474745594524e-05, "loss": 0.7212, "step": 10195 }, { "epoch": 3.318152244632401, "grad_norm": 1.4956382513046265, "learning_rate": 1.3053797277702339e-05, "loss": 0.7026, "step": 10200 }, { "epoch": 3.319778789850358, "grad_norm": 1.6174901723861694, "learning_rate": 1.3031132540988331e-05, "loss": 0.7238, "step": 10205 }, { "epoch": 3.321405335068315, "grad_norm": 1.677567481994629, "learning_rate": 1.3008480559606534e-05, "loss": 0.7206, "step": 10210 }, { "epoch": 3.3230318802862717, "grad_norm": 1.453972578048706, "learning_rate": 1.2985841357697359e-05, "loss": 0.7292, "step": 10215 }, { "epoch": 3.324658425504229, "grad_norm": 1.7349648475646973, "learning_rate": 1.2963214959387632e-05, "loss": 0.7551, "step": 10220 }, { "epoch": 3.326284970722186, "grad_norm": 1.589353084564209, "learning_rate": 1.2940601388790475e-05, "loss": 0.7172, "step": 10225 }, { "epoch": 3.3279115159401433, "grad_norm": 1.5932533740997314, "learning_rate": 1.2918000670005429e-05, "loss": 0.7285, "step": 10230 }, { "epoch": 3.3295380611581002, "grad_norm": 1.4485647678375244, "learning_rate": 1.2895412827118252e-05, "loss": 0.7124, "step": 10235 }, { "epoch": 3.331164606376057, "grad_norm": 1.3996765613555908, "learning_rate": 1.2872837884201028e-05, "loss": 0.7202, "step": 10240 }, { "epoch": 3.332791151594014, "grad_norm": 1.453568458557129, "learning_rate": 1.2850275865312089e-05, "loss": 0.7284, "step": 10245 }, { "epoch": 3.3344176968119714, "grad_norm": 1.5970523357391357, "learning_rate": 1.282772679449597e-05, "loss": 0.7023, "step": 10250 }, { "epoch": 3.3360442420299283, "grad_norm": 1.5225187540054321, "learning_rate": 1.2805190695783442e-05, "loss": 0.6846, "step": 10255 }, { "epoch": 3.3376707872478857, "grad_norm": 1.5703229904174805, "learning_rate": 1.2782667593191403e-05, "loss": 0.6943, "step": 10260 }, { "epoch": 3.3392973324658426, "grad_norm": 1.332912802696228, "learning_rate": 1.276015751072297e-05, "loss": 0.6942, "step": 10265 }, { "epoch": 3.3409238776837995, "grad_norm": 1.6624693870544434, "learning_rate": 1.2737660472367314e-05, "loss": 0.714, "step": 10270 }, { "epoch": 3.342550422901757, "grad_norm": 1.4957422018051147, "learning_rate": 1.2715176502099755e-05, "loss": 0.6987, "step": 10275 }, { "epoch": 3.3441769681197138, "grad_norm": 1.4958122968673706, "learning_rate": 1.2692705623881651e-05, "loss": 0.7424, "step": 10280 }, { "epoch": 3.3458035133376707, "grad_norm": 1.522931694984436, "learning_rate": 1.2670247861660438e-05, "loss": 0.7368, "step": 10285 }, { "epoch": 3.347430058555628, "grad_norm": 1.5897443294525146, "learning_rate": 1.264780323936954e-05, "loss": 0.7108, "step": 10290 }, { "epoch": 3.349056603773585, "grad_norm": 1.2949689626693726, "learning_rate": 1.2625371780928428e-05, "loss": 0.7016, "step": 10295 }, { "epoch": 3.350683148991542, "grad_norm": 1.3897663354873657, "learning_rate": 1.2602953510242487e-05, "loss": 0.7008, "step": 10300 }, { "epoch": 3.352309694209499, "grad_norm": 1.5072520971298218, "learning_rate": 1.2580548451203095e-05, "loss": 0.6824, "step": 10305 }, { "epoch": 3.353936239427456, "grad_norm": 1.448017954826355, "learning_rate": 1.2558156627687507e-05, "loss": 0.7246, "step": 10310 }, { "epoch": 3.355562784645413, "grad_norm": 1.5598937273025513, "learning_rate": 1.2535778063558917e-05, "loss": 0.705, "step": 10315 }, { "epoch": 3.3571893298633704, "grad_norm": 1.5127761363983154, "learning_rate": 1.251341278266635e-05, "loss": 0.7686, "step": 10320 }, { "epoch": 3.3588158750813273, "grad_norm": 1.655152440071106, "learning_rate": 1.2491060808844696e-05, "loss": 0.7337, "step": 10325 }, { "epoch": 3.360442420299284, "grad_norm": 1.5748684406280518, "learning_rate": 1.2468722165914662e-05, "loss": 0.7189, "step": 10330 }, { "epoch": 3.3620689655172415, "grad_norm": 1.4318666458129883, "learning_rate": 1.2446396877682756e-05, "loss": 0.7528, "step": 10335 }, { "epoch": 3.3636955107351985, "grad_norm": 1.6140999794006348, "learning_rate": 1.2424084967941222e-05, "loss": 0.7231, "step": 10340 }, { "epoch": 3.3653220559531554, "grad_norm": 1.6013562679290771, "learning_rate": 1.2401786460468087e-05, "loss": 0.7028, "step": 10345 }, { "epoch": 3.3669486011711127, "grad_norm": 1.481052041053772, "learning_rate": 1.2379501379027059e-05, "loss": 0.6973, "step": 10350 }, { "epoch": 3.3685751463890696, "grad_norm": 1.4653503894805908, "learning_rate": 1.235722974736756e-05, "loss": 0.7276, "step": 10355 }, { "epoch": 3.3702016916070265, "grad_norm": 1.797591209411621, "learning_rate": 1.2334971589224675e-05, "loss": 0.7194, "step": 10360 }, { "epoch": 3.371828236824984, "grad_norm": 1.5291736125946045, "learning_rate": 1.2312726928319138e-05, "loss": 0.7191, "step": 10365 }, { "epoch": 3.373454782042941, "grad_norm": 1.5751301050186157, "learning_rate": 1.2290495788357267e-05, "loss": 0.6883, "step": 10370 }, { "epoch": 3.3750813272608977, "grad_norm": 1.4436094760894775, "learning_rate": 1.2268278193031008e-05, "loss": 0.7259, "step": 10375 }, { "epoch": 3.376707872478855, "grad_norm": 1.4931341409683228, "learning_rate": 1.224607416601786e-05, "loss": 0.6869, "step": 10380 }, { "epoch": 3.378334417696812, "grad_norm": 1.5743861198425293, "learning_rate": 1.2223883730980843e-05, "loss": 0.727, "step": 10385 }, { "epoch": 3.379960962914769, "grad_norm": 1.6630041599273682, "learning_rate": 1.2201706911568515e-05, "loss": 0.6931, "step": 10390 }, { "epoch": 3.3815875081327262, "grad_norm": 1.3692864179611206, "learning_rate": 1.2179543731414919e-05, "loss": 0.697, "step": 10395 }, { "epoch": 3.383214053350683, "grad_norm": 1.7292215824127197, "learning_rate": 1.215739421413957e-05, "loss": 0.7022, "step": 10400 }, { "epoch": 3.38484059856864, "grad_norm": 1.7678391933441162, "learning_rate": 1.2135258383347392e-05, "loss": 0.6958, "step": 10405 }, { "epoch": 3.3864671437865974, "grad_norm": 1.4655472040176392, "learning_rate": 1.211313626262876e-05, "loss": 0.6881, "step": 10410 }, { "epoch": 3.3880936890045543, "grad_norm": 1.5148380994796753, "learning_rate": 1.2091027875559408e-05, "loss": 0.7441, "step": 10415 }, { "epoch": 3.3897202342225112, "grad_norm": 1.4870203733444214, "learning_rate": 1.2068933245700454e-05, "loss": 0.6823, "step": 10420 }, { "epoch": 3.3913467794404686, "grad_norm": 1.5015543699264526, "learning_rate": 1.204685239659835e-05, "loss": 0.7193, "step": 10425 }, { "epoch": 3.3929733246584255, "grad_norm": 1.6654022932052612, "learning_rate": 1.2024785351784868e-05, "loss": 0.7454, "step": 10430 }, { "epoch": 3.3945998698763824, "grad_norm": 1.4848318099975586, "learning_rate": 1.2002732134777039e-05, "loss": 0.719, "step": 10435 }, { "epoch": 3.3962264150943398, "grad_norm": 1.4724451303482056, "learning_rate": 1.1980692769077207e-05, "loss": 0.7228, "step": 10440 }, { "epoch": 3.3978529603122967, "grad_norm": 1.5084017515182495, "learning_rate": 1.1958667278172897e-05, "loss": 0.7054, "step": 10445 }, { "epoch": 3.3994795055302536, "grad_norm": 1.4197804927825928, "learning_rate": 1.1936655685536896e-05, "loss": 0.7361, "step": 10450 }, { "epoch": 3.401106050748211, "grad_norm": 1.490307331085205, "learning_rate": 1.1914658014627156e-05, "loss": 0.7126, "step": 10455 }, { "epoch": 3.402732595966168, "grad_norm": 1.641398549079895, "learning_rate": 1.1897069917296555e-05, "loss": 0.7331, "step": 10460 }, { "epoch": 3.4043591411841247, "grad_norm": 1.4080244302749634, "learning_rate": 1.1875097364560772e-05, "loss": 0.698, "step": 10465 }, { "epoch": 3.405985686402082, "grad_norm": 1.6090103387832642, "learning_rate": 1.1853138799154514e-05, "loss": 0.739, "step": 10470 }, { "epoch": 3.407612231620039, "grad_norm": 1.4078242778778076, "learning_rate": 1.183119424447923e-05, "loss": 0.7201, "step": 10475 }, { "epoch": 3.409238776837996, "grad_norm": 1.4669996500015259, "learning_rate": 1.1809263723921438e-05, "loss": 0.7012, "step": 10480 }, { "epoch": 3.4108653220559533, "grad_norm": 1.5093704462051392, "learning_rate": 1.1787347260852702e-05, "loss": 0.7038, "step": 10485 }, { "epoch": 3.41249186727391, "grad_norm": 1.4808945655822754, "learning_rate": 1.1765444878629583e-05, "loss": 0.7289, "step": 10490 }, { "epoch": 3.414118412491867, "grad_norm": 1.7515590190887451, "learning_rate": 1.1743556600593667e-05, "loss": 0.7325, "step": 10495 }, { "epoch": 3.4157449577098244, "grad_norm": 1.5390026569366455, "learning_rate": 1.1721682450071476e-05, "loss": 0.7099, "step": 10500 }, { "epoch": 3.4173715029277814, "grad_norm": 1.5322160720825195, "learning_rate": 1.16998224503745e-05, "loss": 0.7434, "step": 10505 }, { "epoch": 3.4189980481457383, "grad_norm": 1.4386855363845825, "learning_rate": 1.167797662479915e-05, "loss": 0.7354, "step": 10510 }, { "epoch": 3.4206245933636956, "grad_norm": 1.5932950973510742, "learning_rate": 1.1656144996626713e-05, "loss": 0.7492, "step": 10515 }, { "epoch": 3.4222511385816525, "grad_norm": 1.5420165061950684, "learning_rate": 1.1634327589123373e-05, "loss": 0.7103, "step": 10520 }, { "epoch": 3.4238776837996094, "grad_norm": 1.5688308477401733, "learning_rate": 1.161252442554012e-05, "loss": 0.7307, "step": 10525 }, { "epoch": 3.425504229017567, "grad_norm": 1.396478533744812, "learning_rate": 1.1590735529112806e-05, "loss": 0.7005, "step": 10530 }, { "epoch": 3.4271307742355237, "grad_norm": 1.5843379497528076, "learning_rate": 1.1568960923062036e-05, "loss": 0.7425, "step": 10535 }, { "epoch": 3.4287573194534806, "grad_norm": 1.4384610652923584, "learning_rate": 1.1547200630593224e-05, "loss": 0.7036, "step": 10540 }, { "epoch": 3.430383864671438, "grad_norm": 2.012709379196167, "learning_rate": 1.152545467489651e-05, "loss": 0.6841, "step": 10545 }, { "epoch": 3.432010409889395, "grad_norm": 1.658067226409912, "learning_rate": 1.1503723079146766e-05, "loss": 0.7468, "step": 10550 }, { "epoch": 3.4336369551073522, "grad_norm": 1.7124435901641846, "learning_rate": 1.1482005866503543e-05, "loss": 0.7366, "step": 10555 }, { "epoch": 3.435263500325309, "grad_norm": 1.6711723804473877, "learning_rate": 1.1460303060111083e-05, "loss": 0.7273, "step": 10560 }, { "epoch": 3.436890045543266, "grad_norm": 1.592267394065857, "learning_rate": 1.1438614683098256e-05, "loss": 0.6897, "step": 10565 }, { "epoch": 3.438516590761223, "grad_norm": 1.4569389820098877, "learning_rate": 1.1416940758578567e-05, "loss": 0.7295, "step": 10570 }, { "epoch": 3.4401431359791803, "grad_norm": 1.7420532703399658, "learning_rate": 1.1395281309650125e-05, "loss": 0.7123, "step": 10575 }, { "epoch": 3.441769681197137, "grad_norm": 1.4769504070281982, "learning_rate": 1.137363635939561e-05, "loss": 0.6848, "step": 10580 }, { "epoch": 3.4433962264150946, "grad_norm": 1.5139350891113281, "learning_rate": 1.135200593088222e-05, "loss": 0.6861, "step": 10585 }, { "epoch": 3.4450227716330515, "grad_norm": 1.574965238571167, "learning_rate": 1.1330390047161729e-05, "loss": 0.7051, "step": 10590 }, { "epoch": 3.4466493168510084, "grad_norm": 1.7573829889297485, "learning_rate": 1.1308788731270362e-05, "loss": 0.7307, "step": 10595 }, { "epoch": 3.4482758620689653, "grad_norm": 1.6615879535675049, "learning_rate": 1.1287202006228858e-05, "loss": 0.7444, "step": 10600 }, { "epoch": 3.4499024072869227, "grad_norm": 1.6836899518966675, "learning_rate": 1.126562989504236e-05, "loss": 0.7067, "step": 10605 }, { "epoch": 3.4515289525048796, "grad_norm": 1.8277279138565063, "learning_rate": 1.1244072420700502e-05, "loss": 0.7354, "step": 10610 }, { "epoch": 3.453155497722837, "grad_norm": 1.505303978919983, "learning_rate": 1.122252960617726e-05, "loss": 0.7139, "step": 10615 }, { "epoch": 3.454782042940794, "grad_norm": 1.5495682954788208, "learning_rate": 1.1201001474431022e-05, "loss": 0.6975, "step": 10620 }, { "epoch": 3.4564085881587507, "grad_norm": 1.499089002609253, "learning_rate": 1.1179488048404498e-05, "loss": 0.7093, "step": 10625 }, { "epoch": 3.4580351333767076, "grad_norm": 1.4078710079193115, "learning_rate": 1.1157989351024767e-05, "loss": 0.716, "step": 10630 }, { "epoch": 3.459661678594665, "grad_norm": 1.8323733806610107, "learning_rate": 1.113650540520316e-05, "loss": 0.691, "step": 10635 }, { "epoch": 3.461288223812622, "grad_norm": 1.3862072229385376, "learning_rate": 1.1115036233835349e-05, "loss": 0.6881, "step": 10640 }, { "epoch": 3.4629147690305793, "grad_norm": 1.693575143814087, "learning_rate": 1.1093581859801205e-05, "loss": 0.6811, "step": 10645 }, { "epoch": 3.464541314248536, "grad_norm": 1.713070273399353, "learning_rate": 1.1072142305964855e-05, "loss": 0.7084, "step": 10650 }, { "epoch": 3.466167859466493, "grad_norm": 1.6855125427246094, "learning_rate": 1.1050717595174645e-05, "loss": 0.7385, "step": 10655 }, { "epoch": 3.46779440468445, "grad_norm": 1.8462568521499634, "learning_rate": 1.102930775026306e-05, "loss": 0.7272, "step": 10660 }, { "epoch": 3.4694209499024073, "grad_norm": 1.3941353559494019, "learning_rate": 1.1007912794046796e-05, "loss": 0.699, "step": 10665 }, { "epoch": 3.4710474951203643, "grad_norm": 1.3901314735412598, "learning_rate": 1.098653274932662e-05, "loss": 0.735, "step": 10670 }, { "epoch": 3.4726740403383216, "grad_norm": 1.5157406330108643, "learning_rate": 1.0965167638887483e-05, "loss": 0.7, "step": 10675 }, { "epoch": 3.4743005855562785, "grad_norm": 1.5837805271148682, "learning_rate": 1.094381748549835e-05, "loss": 0.7497, "step": 10680 }, { "epoch": 3.4759271307742354, "grad_norm": 1.4613126516342163, "learning_rate": 1.0922482311912297e-05, "loss": 0.7371, "step": 10685 }, { "epoch": 3.4775536759921923, "grad_norm": 1.4788724184036255, "learning_rate": 1.0901162140866395e-05, "loss": 0.6984, "step": 10690 }, { "epoch": 3.4791802212101497, "grad_norm": 1.4860212802886963, "learning_rate": 1.0879856995081764e-05, "loss": 0.7104, "step": 10695 }, { "epoch": 3.4808067664281066, "grad_norm": 1.4833911657333374, "learning_rate": 1.0858566897263475e-05, "loss": 0.679, "step": 10700 }, { "epoch": 3.482433311646064, "grad_norm": 1.7178986072540283, "learning_rate": 1.0837291870100594e-05, "loss": 0.7136, "step": 10705 }, { "epoch": 3.484059856864021, "grad_norm": 1.597494125366211, "learning_rate": 1.081603193626611e-05, "loss": 0.7045, "step": 10710 }, { "epoch": 3.4856864020819778, "grad_norm": 1.7972383499145508, "learning_rate": 1.079478711841694e-05, "loss": 0.7264, "step": 10715 }, { "epoch": 3.487312947299935, "grad_norm": 1.547997236251831, "learning_rate": 1.0773557439193865e-05, "loss": 0.7244, "step": 10720 }, { "epoch": 3.488939492517892, "grad_norm": 1.9032717943191528, "learning_rate": 1.0752342921221567e-05, "loss": 0.6948, "step": 10725 }, { "epoch": 3.490566037735849, "grad_norm": 2.0314126014709473, "learning_rate": 1.0731143587108533e-05, "loss": 0.7103, "step": 10730 }, { "epoch": 3.4921925829538063, "grad_norm": 1.7030878067016602, "learning_rate": 1.0709959459447103e-05, "loss": 0.72, "step": 10735 }, { "epoch": 3.493819128171763, "grad_norm": 1.4046820402145386, "learning_rate": 1.0688790560813388e-05, "loss": 0.6971, "step": 10740 }, { "epoch": 3.49544567338972, "grad_norm": 1.853163480758667, "learning_rate": 1.0667636913767295e-05, "loss": 0.7415, "step": 10745 }, { "epoch": 3.4970722186076775, "grad_norm": 1.5057862997055054, "learning_rate": 1.064649854085244e-05, "loss": 0.7108, "step": 10750 }, { "epoch": 3.4986987638256344, "grad_norm": 1.6542308330535889, "learning_rate": 1.0625375464596196e-05, "loss": 0.7096, "step": 10755 }, { "epoch": 3.5003253090435913, "grad_norm": 1.6097996234893799, "learning_rate": 1.0604267707509608e-05, "loss": 0.6943, "step": 10760 }, { "epoch": 3.5019518542615486, "grad_norm": 1.5394870042800903, "learning_rate": 1.058317529208741e-05, "loss": 0.7102, "step": 10765 }, { "epoch": 3.5035783994795056, "grad_norm": 1.442588448524475, "learning_rate": 1.0562098240807989e-05, "loss": 0.7076, "step": 10770 }, { "epoch": 3.5052049446974625, "grad_norm": 1.5214558839797974, "learning_rate": 1.0541036576133357e-05, "loss": 0.6959, "step": 10775 }, { "epoch": 3.5068314899154194, "grad_norm": 1.8879523277282715, "learning_rate": 1.0519990320509104e-05, "loss": 0.6911, "step": 10780 }, { "epoch": 3.5084580351333767, "grad_norm": 1.5548925399780273, "learning_rate": 1.0498959496364436e-05, "loss": 0.7065, "step": 10785 }, { "epoch": 3.5100845803513336, "grad_norm": 1.7276703119277954, "learning_rate": 1.0477944126112097e-05, "loss": 0.7163, "step": 10790 }, { "epoch": 3.511711125569291, "grad_norm": 1.5614087581634521, "learning_rate": 1.0456944232148344e-05, "loss": 0.693, "step": 10795 }, { "epoch": 3.513337670787248, "grad_norm": 1.639573574066162, "learning_rate": 1.0435959836852967e-05, "loss": 0.7031, "step": 10800 }, { "epoch": 3.514964216005205, "grad_norm": 1.4618337154388428, "learning_rate": 1.0414990962589233e-05, "loss": 0.7181, "step": 10805 }, { "epoch": 3.516590761223162, "grad_norm": 1.5002321004867554, "learning_rate": 1.0394037631703867e-05, "loss": 0.712, "step": 10810 }, { "epoch": 3.518217306441119, "grad_norm": 1.6069719791412354, "learning_rate": 1.0373099866527012e-05, "loss": 0.7276, "step": 10815 }, { "epoch": 3.519843851659076, "grad_norm": 1.5763250589370728, "learning_rate": 1.0352177689372256e-05, "loss": 0.7147, "step": 10820 }, { "epoch": 3.5214703968770333, "grad_norm": 1.7312473058700562, "learning_rate": 1.0331271122536534e-05, "loss": 0.7402, "step": 10825 }, { "epoch": 3.5230969420949902, "grad_norm": 1.7951452732086182, "learning_rate": 1.0310380188300178e-05, "loss": 0.7273, "step": 10830 }, { "epoch": 3.524723487312947, "grad_norm": 1.6065268516540527, "learning_rate": 1.0289504908926847e-05, "loss": 0.7099, "step": 10835 }, { "epoch": 3.5263500325309045, "grad_norm": 1.5761017799377441, "learning_rate": 1.0268645306663532e-05, "loss": 0.7255, "step": 10840 }, { "epoch": 3.5279765777488614, "grad_norm": 1.8556472063064575, "learning_rate": 1.0247801403740482e-05, "loss": 0.7375, "step": 10845 }, { "epoch": 3.5296031229668183, "grad_norm": 1.540916085243225, "learning_rate": 1.0226973222371253e-05, "loss": 0.698, "step": 10850 }, { "epoch": 3.5312296681847757, "grad_norm": 1.4681557416915894, "learning_rate": 1.0206160784752613e-05, "loss": 0.7412, "step": 10855 }, { "epoch": 3.5328562134027326, "grad_norm": 1.5720964670181274, "learning_rate": 1.0185364113064577e-05, "loss": 0.7161, "step": 10860 }, { "epoch": 3.5344827586206895, "grad_norm": 1.4892054796218872, "learning_rate": 1.016458322947035e-05, "loss": 0.7072, "step": 10865 }, { "epoch": 3.536109303838647, "grad_norm": 1.7336512804031372, "learning_rate": 1.0143818156116323e-05, "loss": 0.7018, "step": 10870 }, { "epoch": 3.5377358490566038, "grad_norm": 1.3897322416305542, "learning_rate": 1.0123068915132e-05, "loss": 0.7183, "step": 10875 }, { "epoch": 3.5393623942745607, "grad_norm": 1.5542066097259521, "learning_rate": 1.0102335528630061e-05, "loss": 0.7051, "step": 10880 }, { "epoch": 3.540988939492518, "grad_norm": 1.5814255475997925, "learning_rate": 1.008161801870625e-05, "loss": 0.7023, "step": 10885 }, { "epoch": 3.542615484710475, "grad_norm": 1.7088481187820435, "learning_rate": 1.0060916407439413e-05, "loss": 0.6927, "step": 10890 }, { "epoch": 3.544242029928432, "grad_norm": 1.7526652812957764, "learning_rate": 1.0040230716891449e-05, "loss": 0.7098, "step": 10895 }, { "epoch": 3.545868575146389, "grad_norm": 1.8259118795394897, "learning_rate": 1.0019560969107302e-05, "loss": 0.7158, "step": 10900 }, { "epoch": 3.547495120364346, "grad_norm": 1.719221830368042, "learning_rate": 9.99890718611489e-06, "loss": 0.7071, "step": 10905 }, { "epoch": 3.5491216655823035, "grad_norm": 1.5181546211242676, "learning_rate": 9.978269389925157e-06, "loss": 0.6808, "step": 10910 }, { "epoch": 3.5507482108002604, "grad_norm": 1.891473412513733, "learning_rate": 9.957647602531977e-06, "loss": 0.7008, "step": 10915 }, { "epoch": 3.5523747560182173, "grad_norm": 1.359625220298767, "learning_rate": 9.937041845912188e-06, "loss": 0.7126, "step": 10920 }, { "epoch": 3.554001301236174, "grad_norm": 1.5642650127410889, "learning_rate": 9.91645214202553e-06, "loss": 0.6957, "step": 10925 }, { "epoch": 3.5556278464541315, "grad_norm": 1.5305712223052979, "learning_rate": 9.895878512814647e-06, "loss": 0.71, "step": 10930 }, { "epoch": 3.5572543916720885, "grad_norm": 1.5830919742584229, "learning_rate": 9.875320980205046e-06, "loss": 0.7211, "step": 10935 }, { "epoch": 3.558880936890046, "grad_norm": 1.6251001358032227, "learning_rate": 9.854779566105068e-06, "loss": 0.7022, "step": 10940 }, { "epoch": 3.5605074821080027, "grad_norm": 1.588088870048523, "learning_rate": 9.834254292405901e-06, "loss": 0.7137, "step": 10945 }, { "epoch": 3.5621340273259596, "grad_norm": 1.421298861503601, "learning_rate": 9.813745180981502e-06, "loss": 0.6836, "step": 10950 }, { "epoch": 3.5637605725439165, "grad_norm": 1.5378276109695435, "learning_rate": 9.793252253688626e-06, "loss": 0.7325, "step": 10955 }, { "epoch": 3.565387117761874, "grad_norm": 1.389072299003601, "learning_rate": 9.772775532366774e-06, "loss": 0.7104, "step": 10960 }, { "epoch": 3.567013662979831, "grad_norm": 1.4843015670776367, "learning_rate": 9.75231503883819e-06, "loss": 0.7155, "step": 10965 }, { "epoch": 3.568640208197788, "grad_norm": 1.5925216674804688, "learning_rate": 9.731870794907789e-06, "loss": 0.7251, "step": 10970 }, { "epoch": 3.570266753415745, "grad_norm": 1.4958367347717285, "learning_rate": 9.711442822363209e-06, "loss": 0.7518, "step": 10975 }, { "epoch": 3.571893298633702, "grad_norm": 1.3752219676971436, "learning_rate": 9.691031142974707e-06, "loss": 0.7191, "step": 10980 }, { "epoch": 3.573519843851659, "grad_norm": 1.3566709756851196, "learning_rate": 9.670635778495213e-06, "loss": 0.7256, "step": 10985 }, { "epoch": 3.5751463890696162, "grad_norm": 1.5694020986557007, "learning_rate": 9.65025675066025e-06, "loss": 0.7189, "step": 10990 }, { "epoch": 3.576772934287573, "grad_norm": 1.3972755670547485, "learning_rate": 9.629894081187943e-06, "loss": 0.7064, "step": 10995 }, { "epoch": 3.5783994795055305, "grad_norm": 1.4390963315963745, "learning_rate": 9.609547791778964e-06, "loss": 0.7371, "step": 11000 }, { "epoch": 3.5800260247234874, "grad_norm": 1.5027835369110107, "learning_rate": 9.589217904116554e-06, "loss": 0.7328, "step": 11005 }, { "epoch": 3.5816525699414443, "grad_norm": 1.918851375579834, "learning_rate": 9.568904439866444e-06, "loss": 0.7069, "step": 11010 }, { "epoch": 3.5832791151594012, "grad_norm": 1.56783926486969, "learning_rate": 9.548607420676902e-06, "loss": 0.7315, "step": 11015 }, { "epoch": 3.5849056603773586, "grad_norm": 1.5739941596984863, "learning_rate": 9.528326868178616e-06, "loss": 0.6858, "step": 11020 }, { "epoch": 3.5865322055953155, "grad_norm": 1.6817781925201416, "learning_rate": 9.508062803984796e-06, "loss": 0.7336, "step": 11025 }, { "epoch": 3.588158750813273, "grad_norm": 1.7366712093353271, "learning_rate": 9.487815249691012e-06, "loss": 0.6986, "step": 11030 }, { "epoch": 3.5897852960312298, "grad_norm": 1.7367922067642212, "learning_rate": 9.467584226875292e-06, "loss": 0.7224, "step": 11035 }, { "epoch": 3.5914118412491867, "grad_norm": 1.5156676769256592, "learning_rate": 9.447369757098002e-06, "loss": 0.7264, "step": 11040 }, { "epoch": 3.5930383864671436, "grad_norm": 1.433984637260437, "learning_rate": 9.427171861901903e-06, "loss": 0.712, "step": 11045 }, { "epoch": 3.594664931685101, "grad_norm": 1.7231719493865967, "learning_rate": 9.406990562812068e-06, "loss": 0.7294, "step": 11050 }, { "epoch": 3.596291476903058, "grad_norm": 1.6528314352035522, "learning_rate": 9.386825881335889e-06, "loss": 0.6908, "step": 11055 }, { "epoch": 3.597918022121015, "grad_norm": 1.562302589416504, "learning_rate": 9.366677838963078e-06, "loss": 0.718, "step": 11060 }, { "epoch": 3.599544567338972, "grad_norm": 1.53555428981781, "learning_rate": 9.34654645716556e-06, "loss": 0.6813, "step": 11065 }, { "epoch": 3.601171112556929, "grad_norm": 1.542402982711792, "learning_rate": 9.32643175739756e-06, "loss": 0.6839, "step": 11070 }, { "epoch": 3.602797657774886, "grad_norm": 1.4688427448272705, "learning_rate": 9.306333761095476e-06, "loss": 0.6896, "step": 11075 }, { "epoch": 3.6044242029928433, "grad_norm": 1.498922348022461, "learning_rate": 9.286252489677944e-06, "loss": 0.7207, "step": 11080 }, { "epoch": 3.6060507482108, "grad_norm": 1.6963268518447876, "learning_rate": 9.266187964545744e-06, "loss": 0.7129, "step": 11085 }, { "epoch": 3.6076772934287575, "grad_norm": 1.5394963026046753, "learning_rate": 9.246140207081833e-06, "loss": 0.7275, "step": 11090 }, { "epoch": 3.6093038386467144, "grad_norm": 1.4630099534988403, "learning_rate": 9.226109238651293e-06, "loss": 0.7011, "step": 11095 }, { "epoch": 3.6109303838646714, "grad_norm": 1.7235665321350098, "learning_rate": 9.206095080601319e-06, "loss": 0.681, "step": 11100 }, { "epoch": 3.6125569290826283, "grad_norm": 1.4839284420013428, "learning_rate": 9.18609775426116e-06, "loss": 0.7282, "step": 11105 }, { "epoch": 3.6141834743005856, "grad_norm": 1.771009087562561, "learning_rate": 9.16611728094218e-06, "loss": 0.7379, "step": 11110 }, { "epoch": 3.6158100195185425, "grad_norm": 1.3977216482162476, "learning_rate": 9.146153681937725e-06, "loss": 0.6727, "step": 11115 }, { "epoch": 3.6174365647365, "grad_norm": 1.771681785583496, "learning_rate": 9.126206978523202e-06, "loss": 0.7196, "step": 11120 }, { "epoch": 3.619063109954457, "grad_norm": 1.6849372386932373, "learning_rate": 9.106277191955992e-06, "loss": 0.7228, "step": 11125 }, { "epoch": 3.6206896551724137, "grad_norm": 1.70037043094635, "learning_rate": 9.086364343475461e-06, "loss": 0.7348, "step": 11130 }, { "epoch": 3.6223162003903706, "grad_norm": 1.5290961265563965, "learning_rate": 9.0664684543029e-06, "loss": 0.7217, "step": 11135 }, { "epoch": 3.623942745608328, "grad_norm": 1.5708940029144287, "learning_rate": 9.04658954564156e-06, "loss": 0.7242, "step": 11140 }, { "epoch": 3.625569290826285, "grad_norm": 1.5502111911773682, "learning_rate": 9.026727638676554e-06, "loss": 0.7298, "step": 11145 }, { "epoch": 3.6271958360442422, "grad_norm": 1.4423314332962036, "learning_rate": 9.006882754574914e-06, "loss": 0.7241, "step": 11150 }, { "epoch": 3.628822381262199, "grad_norm": 1.626993179321289, "learning_rate": 8.98705491448551e-06, "loss": 0.6877, "step": 11155 }, { "epoch": 3.630448926480156, "grad_norm": 1.6684397459030151, "learning_rate": 8.967244139539064e-06, "loss": 0.6866, "step": 11160 }, { "epoch": 3.632075471698113, "grad_norm": 1.7069201469421387, "learning_rate": 8.947450450848086e-06, "loss": 0.73, "step": 11165 }, { "epoch": 3.6337020169160703, "grad_norm": 1.5043277740478516, "learning_rate": 8.927673869506905e-06, "loss": 0.7068, "step": 11170 }, { "epoch": 3.635328562134027, "grad_norm": 1.4194046258926392, "learning_rate": 8.907914416591595e-06, "loss": 0.7215, "step": 11175 }, { "epoch": 3.6369551073519846, "grad_norm": 1.5382648706436157, "learning_rate": 8.888172113159989e-06, "loss": 0.7004, "step": 11180 }, { "epoch": 3.6385816525699415, "grad_norm": 1.7095139026641846, "learning_rate": 8.868446980251647e-06, "loss": 0.7041, "step": 11185 }, { "epoch": 3.6402081977878984, "grad_norm": 1.5075514316558838, "learning_rate": 8.848739038887822e-06, "loss": 0.743, "step": 11190 }, { "epoch": 3.6418347430058553, "grad_norm": 1.7053310871124268, "learning_rate": 8.829048310071456e-06, "loss": 0.7149, "step": 11195 }, { "epoch": 3.6434612882238127, "grad_norm": 1.6019392013549805, "learning_rate": 8.809374814787124e-06, "loss": 0.7046, "step": 11200 }, { "epoch": 3.6450878334417696, "grad_norm": 1.7075766324996948, "learning_rate": 8.789718574001068e-06, "loss": 0.7196, "step": 11205 }, { "epoch": 3.646714378659727, "grad_norm": 1.6558412313461304, "learning_rate": 8.770079608661108e-06, "loss": 0.7033, "step": 11210 }, { "epoch": 3.648340923877684, "grad_norm": 1.5456069707870483, "learning_rate": 8.750457939696677e-06, "loss": 0.7271, "step": 11215 }, { "epoch": 3.6499674690956407, "grad_norm": 1.431095004081726, "learning_rate": 8.730853588018772e-06, "loss": 0.6879, "step": 11220 }, { "epoch": 3.6515940143135976, "grad_norm": 1.7914400100708008, "learning_rate": 8.711266574519935e-06, "loss": 0.7187, "step": 11225 }, { "epoch": 3.653220559531555, "grad_norm": 1.611136794090271, "learning_rate": 8.691696920074214e-06, "loss": 0.6934, "step": 11230 }, { "epoch": 3.654847104749512, "grad_norm": 1.532126784324646, "learning_rate": 8.676053709051446e-06, "loss": 0.6828, "step": 11235 }, { "epoch": 3.6564736499674693, "grad_norm": 1.7126010656356812, "learning_rate": 8.656515353444911e-06, "loss": 0.7019, "step": 11240 }, { "epoch": 3.658100195185426, "grad_norm": 1.4354956150054932, "learning_rate": 8.636994415240376e-06, "loss": 0.6897, "step": 11245 }, { "epoch": 3.659726740403383, "grad_norm": 1.6650861501693726, "learning_rate": 8.617490915241496e-06, "loss": 0.733, "step": 11250 }, { "epoch": 3.6613532856213404, "grad_norm": 1.6253299713134766, "learning_rate": 8.598004874233315e-06, "loss": 0.7454, "step": 11255 }, { "epoch": 3.6629798308392973, "grad_norm": 1.6269207000732422, "learning_rate": 8.578536312982299e-06, "loss": 0.6768, "step": 11260 }, { "epoch": 3.6646063760572543, "grad_norm": 1.7291845083236694, "learning_rate": 8.559085252236259e-06, "loss": 0.714, "step": 11265 }, { "epoch": 3.6662329212752116, "grad_norm": 1.4800081253051758, "learning_rate": 8.539651712724376e-06, "loss": 0.7023, "step": 11270 }, { "epoch": 3.6678594664931685, "grad_norm": 1.5207598209381104, "learning_rate": 8.520235715157152e-06, "loss": 0.6963, "step": 11275 }, { "epoch": 3.6694860117111254, "grad_norm": 1.66194486618042, "learning_rate": 8.500837280226403e-06, "loss": 0.7217, "step": 11280 }, { "epoch": 3.671112556929083, "grad_norm": 1.8510164022445679, "learning_rate": 8.481456428605205e-06, "loss": 0.7468, "step": 11285 }, { "epoch": 3.6727391021470397, "grad_norm": 1.5968811511993408, "learning_rate": 8.462093180947924e-06, "loss": 0.7157, "step": 11290 }, { "epoch": 3.6743656473649966, "grad_norm": 1.511631965637207, "learning_rate": 8.442747557890138e-06, "loss": 0.7155, "step": 11295 }, { "epoch": 3.675992192582954, "grad_norm": 1.633763074874878, "learning_rate": 8.423419580048659e-06, "loss": 0.7185, "step": 11300 }, { "epoch": 3.677618737800911, "grad_norm": 1.4660612344741821, "learning_rate": 8.404109268021493e-06, "loss": 0.7117, "step": 11305 }, { "epoch": 3.6792452830188678, "grad_norm": 1.5822746753692627, "learning_rate": 8.384816642387827e-06, "loss": 0.7237, "step": 11310 }, { "epoch": 3.680871828236825, "grad_norm": 1.7370541095733643, "learning_rate": 8.365541723707971e-06, "loss": 0.7092, "step": 11315 }, { "epoch": 3.682498373454782, "grad_norm": 1.729364275932312, "learning_rate": 8.3462845325234e-06, "loss": 0.7044, "step": 11320 }, { "epoch": 3.684124918672739, "grad_norm": 1.4933305978775024, "learning_rate": 8.327045089356663e-06, "loss": 0.7246, "step": 11325 }, { "epoch": 3.6857514638906963, "grad_norm": 1.5517268180847168, "learning_rate": 8.307823414711424e-06, "loss": 0.6913, "step": 11330 }, { "epoch": 3.687378009108653, "grad_norm": 2.000478744506836, "learning_rate": 8.288619529072394e-06, "loss": 0.6988, "step": 11335 }, { "epoch": 3.68900455432661, "grad_norm": 1.4674965143203735, "learning_rate": 8.269433452905331e-06, "loss": 0.7142, "step": 11340 }, { "epoch": 3.6906310995445675, "grad_norm": 1.628170132637024, "learning_rate": 8.250265206657025e-06, "loss": 0.6953, "step": 11345 }, { "epoch": 3.6922576447625244, "grad_norm": 1.525415062904358, "learning_rate": 8.23111481075523e-06, "loss": 0.713, "step": 11350 }, { "epoch": 3.6938841899804813, "grad_norm": 1.5662649869918823, "learning_rate": 8.211982285608721e-06, "loss": 0.7356, "step": 11355 }, { "epoch": 3.6955107351984386, "grad_norm": 1.7572872638702393, "learning_rate": 8.192867651607188e-06, "loss": 0.6946, "step": 11360 }, { "epoch": 3.6971372804163956, "grad_norm": 1.5514570474624634, "learning_rate": 8.17377092912128e-06, "loss": 0.714, "step": 11365 }, { "epoch": 3.6987638256343525, "grad_norm": 1.732904076576233, "learning_rate": 8.154692138502552e-06, "loss": 0.7164, "step": 11370 }, { "epoch": 3.70039037085231, "grad_norm": 1.4824029207229614, "learning_rate": 8.135631300083448e-06, "loss": 0.7227, "step": 11375 }, { "epoch": 3.7020169160702667, "grad_norm": 1.4314519166946411, "learning_rate": 8.116588434177273e-06, "loss": 0.7107, "step": 11380 }, { "epoch": 3.703643461288224, "grad_norm": 1.5154752731323242, "learning_rate": 8.097563561078193e-06, "loss": 0.7515, "step": 11385 }, { "epoch": 3.705270006506181, "grad_norm": 1.6739246845245361, "learning_rate": 8.078556701061175e-06, "loss": 0.7462, "step": 11390 }, { "epoch": 3.706896551724138, "grad_norm": 1.5182855129241943, "learning_rate": 8.059567874382023e-06, "loss": 0.7261, "step": 11395 }, { "epoch": 3.708523096942095, "grad_norm": 1.617936134338379, "learning_rate": 8.04059710127728e-06, "loss": 0.7165, "step": 11400 }, { "epoch": 3.710149642160052, "grad_norm": 1.5150372982025146, "learning_rate": 8.021644401964305e-06, "loss": 0.701, "step": 11405 }, { "epoch": 3.711776187378009, "grad_norm": 1.5094677209854126, "learning_rate": 8.00270979664114e-06, "loss": 0.6989, "step": 11410 }, { "epoch": 3.7134027325959664, "grad_norm": 1.3593993186950684, "learning_rate": 7.983793305486583e-06, "loss": 0.7089, "step": 11415 }, { "epoch": 3.7150292778139233, "grad_norm": 1.460679292678833, "learning_rate": 7.964894948660102e-06, "loss": 0.7145, "step": 11420 }, { "epoch": 3.7166558230318802, "grad_norm": 1.5193824768066406, "learning_rate": 7.946014746301858e-06, "loss": 0.7212, "step": 11425 }, { "epoch": 3.718282368249837, "grad_norm": 1.578808307647705, "learning_rate": 7.927152718532646e-06, "loss": 0.7086, "step": 11430 }, { "epoch": 3.7199089134677945, "grad_norm": 1.6621711254119873, "learning_rate": 7.908308885453908e-06, "loss": 0.7114, "step": 11435 }, { "epoch": 3.7215354586857514, "grad_norm": 1.5985379219055176, "learning_rate": 7.889483267147693e-06, "loss": 0.7371, "step": 11440 }, { "epoch": 3.7231620039037088, "grad_norm": 1.5030269622802734, "learning_rate": 7.87067588367664e-06, "loss": 0.7097, "step": 11445 }, { "epoch": 3.7247885491216657, "grad_norm": 1.4464192390441895, "learning_rate": 7.851886755083936e-06, "loss": 0.6842, "step": 11450 }, { "epoch": 3.7264150943396226, "grad_norm": 1.6425318717956543, "learning_rate": 7.833115901393347e-06, "loss": 0.7157, "step": 11455 }, { "epoch": 3.7280416395575795, "grad_norm": 1.5340723991394043, "learning_rate": 7.814363342609126e-06, "loss": 0.7198, "step": 11460 }, { "epoch": 3.729668184775537, "grad_norm": 1.7587698698043823, "learning_rate": 7.795629098716045e-06, "loss": 0.7021, "step": 11465 }, { "epoch": 3.7312947299934938, "grad_norm": 1.8319244384765625, "learning_rate": 7.776913189679392e-06, "loss": 0.75, "step": 11470 }, { "epoch": 3.732921275211451, "grad_norm": 1.5190914869308472, "learning_rate": 7.758215635444848e-06, "loss": 0.7333, "step": 11475 }, { "epoch": 3.734547820429408, "grad_norm": 1.4928538799285889, "learning_rate": 7.739536455938595e-06, "loss": 0.6754, "step": 11480 }, { "epoch": 3.736174365647365, "grad_norm": 1.8240560293197632, "learning_rate": 7.720875671067188e-06, "loss": 0.7456, "step": 11485 }, { "epoch": 3.737800910865322, "grad_norm": 1.5365586280822754, "learning_rate": 7.70223330071761e-06, "loss": 0.7417, "step": 11490 }, { "epoch": 3.739427456083279, "grad_norm": 1.4879204034805298, "learning_rate": 7.683609364757192e-06, "loss": 0.7157, "step": 11495 }, { "epoch": 3.741054001301236, "grad_norm": 1.5626345872879028, "learning_rate": 7.665003883033642e-06, "loss": 0.6833, "step": 11500 }, { "epoch": 3.7426805465191935, "grad_norm": 1.4292707443237305, "learning_rate": 7.646416875374992e-06, "loss": 0.7069, "step": 11505 }, { "epoch": 3.7443070917371504, "grad_norm": 1.6828378438949585, "learning_rate": 7.6278483615896015e-06, "loss": 0.7149, "step": 11510 }, { "epoch": 3.7459336369551073, "grad_norm": 1.6302988529205322, "learning_rate": 7.609298361466083e-06, "loss": 0.7372, "step": 11515 }, { "epoch": 3.747560182173064, "grad_norm": 1.5447158813476562, "learning_rate": 7.59076689477336e-06, "loss": 0.7257, "step": 11520 }, { "epoch": 3.7491867273910215, "grad_norm": 1.9097141027450562, "learning_rate": 7.572253981260571e-06, "loss": 0.716, "step": 11525 }, { "epoch": 3.7508132726089785, "grad_norm": 1.738356113433838, "learning_rate": 7.55375964065711e-06, "loss": 0.7247, "step": 11530 }, { "epoch": 3.752439817826936, "grad_norm": 1.5033373832702637, "learning_rate": 7.535283892672562e-06, "loss": 0.7037, "step": 11535 }, { "epoch": 3.7540663630448927, "grad_norm": 1.7826368808746338, "learning_rate": 7.516826756996712e-06, "loss": 0.6994, "step": 11540 }, { "epoch": 3.7556929082628496, "grad_norm": 1.5203220844268799, "learning_rate": 7.498388253299482e-06, "loss": 0.7305, "step": 11545 }, { "epoch": 3.7573194534808065, "grad_norm": 1.5603865385055542, "learning_rate": 7.479968401230972e-06, "loss": 0.6903, "step": 11550 }, { "epoch": 3.758945998698764, "grad_norm": 1.4634088277816772, "learning_rate": 7.46156722042137e-06, "loss": 0.7152, "step": 11555 }, { "epoch": 3.760572543916721, "grad_norm": 1.5767837762832642, "learning_rate": 7.443184730480996e-06, "loss": 0.7505, "step": 11560 }, { "epoch": 3.762199089134678, "grad_norm": 1.5018644332885742, "learning_rate": 7.424820951000233e-06, "loss": 0.7248, "step": 11565 }, { "epoch": 3.763825634352635, "grad_norm": 1.4515255689620972, "learning_rate": 7.406475901549542e-06, "loss": 0.7167, "step": 11570 }, { "epoch": 3.765452179570592, "grad_norm": 1.865311622619629, "learning_rate": 7.388149601679392e-06, "loss": 0.7301, "step": 11575 }, { "epoch": 3.767078724788549, "grad_norm": 1.3774443864822388, "learning_rate": 7.369842070920308e-06, "loss": 0.7133, "step": 11580 }, { "epoch": 3.7687052700065062, "grad_norm": 1.5800745487213135, "learning_rate": 7.351553328782779e-06, "loss": 0.7171, "step": 11585 }, { "epoch": 3.770331815224463, "grad_norm": 1.6577149629592896, "learning_rate": 7.33328339475729e-06, "loss": 0.7152, "step": 11590 }, { "epoch": 3.7719583604424205, "grad_norm": 1.559309959411621, "learning_rate": 7.31503228831428e-06, "loss": 0.7126, "step": 11595 }, { "epoch": 3.7735849056603774, "grad_norm": 1.5590262413024902, "learning_rate": 7.296800028904119e-06, "loss": 0.7048, "step": 11600 }, { "epoch": 3.7752114508783343, "grad_norm": 1.6895976066589355, "learning_rate": 7.278586635957107e-06, "loss": 0.6902, "step": 11605 }, { "epoch": 3.7768379960962912, "grad_norm": 1.499817967414856, "learning_rate": 7.260392128883403e-06, "loss": 0.722, "step": 11610 }, { "epoch": 3.7784645413142486, "grad_norm": 1.5406159162521362, "learning_rate": 7.242216527073079e-06, "loss": 0.7102, "step": 11615 }, { "epoch": 3.7800910865322055, "grad_norm": 1.6556847095489502, "learning_rate": 7.224059849896026e-06, "loss": 0.7162, "step": 11620 }, { "epoch": 3.781717631750163, "grad_norm": 1.633467674255371, "learning_rate": 7.205922116701985e-06, "loss": 0.7055, "step": 11625 }, { "epoch": 3.7833441769681198, "grad_norm": 1.662025809288025, "learning_rate": 7.1878033468205124e-06, "loss": 0.65, "step": 11630 }, { "epoch": 3.7849707221860767, "grad_norm": 1.428796648979187, "learning_rate": 7.169703559560953e-06, "loss": 0.6892, "step": 11635 }, { "epoch": 3.7865972674040336, "grad_norm": 1.8580787181854248, "learning_rate": 7.151622774212396e-06, "loss": 0.7086, "step": 11640 }, { "epoch": 3.788223812621991, "grad_norm": 1.6472827196121216, "learning_rate": 7.133561010043724e-06, "loss": 0.6853, "step": 11645 }, { "epoch": 3.789850357839948, "grad_norm": 1.395544171333313, "learning_rate": 7.1155182863035075e-06, "loss": 0.7099, "step": 11650 }, { "epoch": 3.791476903057905, "grad_norm": 1.4849767684936523, "learning_rate": 7.097494622220049e-06, "loss": 0.6994, "step": 11655 }, { "epoch": 3.793103448275862, "grad_norm": 1.4284588098526, "learning_rate": 7.079490037001332e-06, "loss": 0.6916, "step": 11660 }, { "epoch": 3.794729993493819, "grad_norm": 1.521339774131775, "learning_rate": 7.0615045498350215e-06, "loss": 0.7027, "step": 11665 }, { "epoch": 3.796356538711776, "grad_norm": 1.4687316417694092, "learning_rate": 7.043538179888398e-06, "loss": 0.7079, "step": 11670 }, { "epoch": 3.7979830839297333, "grad_norm": 1.4834345579147339, "learning_rate": 7.025590946308402e-06, "loss": 0.6953, "step": 11675 }, { "epoch": 3.79960962914769, "grad_norm": 1.3771758079528809, "learning_rate": 7.007662868221551e-06, "loss": 0.6955, "step": 11680 }, { "epoch": 3.8012361743656475, "grad_norm": 1.8043354749679565, "learning_rate": 6.9897539647339725e-06, "loss": 0.6839, "step": 11685 }, { "epoch": 3.8028627195836044, "grad_norm": 1.6041468381881714, "learning_rate": 6.971864254931346e-06, "loss": 0.7042, "step": 11690 }, { "epoch": 3.8044892648015614, "grad_norm": 1.669795274734497, "learning_rate": 6.95399375787891e-06, "loss": 0.7015, "step": 11695 }, { "epoch": 3.8061158100195187, "grad_norm": 1.5349482297897339, "learning_rate": 6.9361424926214e-06, "loss": 0.7186, "step": 11700 }, { "epoch": 3.8077423552374756, "grad_norm": 1.5761752128601074, "learning_rate": 6.918310478183093e-06, "loss": 0.7083, "step": 11705 }, { "epoch": 3.8093689004554325, "grad_norm": 1.8643007278442383, "learning_rate": 6.900497733567715e-06, "loss": 0.7289, "step": 11710 }, { "epoch": 3.81099544567339, "grad_norm": 1.6121509075164795, "learning_rate": 6.882704277758475e-06, "loss": 0.7077, "step": 11715 }, { "epoch": 3.812621990891347, "grad_norm": 1.749541997909546, "learning_rate": 6.864930129718028e-06, "loss": 0.6982, "step": 11720 }, { "epoch": 3.8142485361093037, "grad_norm": 1.6512995958328247, "learning_rate": 6.847175308388451e-06, "loss": 0.7089, "step": 11725 }, { "epoch": 3.815875081327261, "grad_norm": 1.5024572610855103, "learning_rate": 6.829439832691206e-06, "loss": 0.7066, "step": 11730 }, { "epoch": 3.817501626545218, "grad_norm": 1.4329208135604858, "learning_rate": 6.811723721527161e-06, "loss": 0.6802, "step": 11735 }, { "epoch": 3.819128171763175, "grad_norm": 1.6244162321090698, "learning_rate": 6.794026993776548e-06, "loss": 0.7063, "step": 11740 }, { "epoch": 3.8207547169811322, "grad_norm": 1.6486992835998535, "learning_rate": 6.776349668298912e-06, "loss": 0.685, "step": 11745 }, { "epoch": 3.822381262199089, "grad_norm": 1.531559705734253, "learning_rate": 6.758691763933156e-06, "loss": 0.7317, "step": 11750 }, { "epoch": 3.824007807417046, "grad_norm": 1.4666805267333984, "learning_rate": 6.741053299497468e-06, "loss": 0.7108, "step": 11755 }, { "epoch": 3.8256343526350034, "grad_norm": 1.5616769790649414, "learning_rate": 6.723434293789327e-06, "loss": 0.7275, "step": 11760 }, { "epoch": 3.8272608978529603, "grad_norm": 1.5382866859436035, "learning_rate": 6.705834765585459e-06, "loss": 0.6876, "step": 11765 }, { "epoch": 3.828887443070917, "grad_norm": 1.4710311889648438, "learning_rate": 6.688254733641855e-06, "loss": 0.7052, "step": 11770 }, { "epoch": 3.8305139882888746, "grad_norm": 1.3839458227157593, "learning_rate": 6.670694216693701e-06, "loss": 0.7052, "step": 11775 }, { "epoch": 3.8321405335068315, "grad_norm": 1.5696918964385986, "learning_rate": 6.653153233455423e-06, "loss": 0.719, "step": 11780 }, { "epoch": 3.8337670787247884, "grad_norm": 1.558218002319336, "learning_rate": 6.635631802620576e-06, "loss": 0.7178, "step": 11785 }, { "epoch": 3.8353936239427457, "grad_norm": 1.9497623443603516, "learning_rate": 6.618129942861947e-06, "loss": 0.7519, "step": 11790 }, { "epoch": 3.8370201691607027, "grad_norm": 3.0386600494384766, "learning_rate": 6.600647672831406e-06, "loss": 0.7207, "step": 11795 }, { "epoch": 3.8386467143786596, "grad_norm": 1.4756920337677002, "learning_rate": 6.5831850111599815e-06, "loss": 0.7034, "step": 11800 }, { "epoch": 3.840273259596617, "grad_norm": 1.4741921424865723, "learning_rate": 6.565741976457782e-06, "loss": 0.6942, "step": 11805 }, { "epoch": 3.841899804814574, "grad_norm": 1.5176244974136353, "learning_rate": 6.548318587314017e-06, "loss": 0.7333, "step": 11810 }, { "epoch": 3.8435263500325307, "grad_norm": 1.6599299907684326, "learning_rate": 6.530914862296947e-06, "loss": 0.7555, "step": 11815 }, { "epoch": 3.845152895250488, "grad_norm": 1.615272879600525, "learning_rate": 6.513530819953883e-06, "loss": 0.7042, "step": 11820 }, { "epoch": 3.846779440468445, "grad_norm": 1.5212656259536743, "learning_rate": 6.496166478811164e-06, "loss": 0.7221, "step": 11825 }, { "epoch": 3.8484059856864024, "grad_norm": 1.5341366529464722, "learning_rate": 6.478821857374129e-06, "loss": 0.7109, "step": 11830 }, { "epoch": 3.8500325309043593, "grad_norm": 1.5961518287658691, "learning_rate": 6.461496974127093e-06, "loss": 0.7069, "step": 11835 }, { "epoch": 3.851659076122316, "grad_norm": 1.665968656539917, "learning_rate": 6.444191847533351e-06, "loss": 0.7402, "step": 11840 }, { "epoch": 3.853285621340273, "grad_norm": 1.474108338356018, "learning_rate": 6.426906496035129e-06, "loss": 0.7197, "step": 11845 }, { "epoch": 3.8549121665582304, "grad_norm": 1.6529964208602905, "learning_rate": 6.409640938053588e-06, "loss": 0.7309, "step": 11850 }, { "epoch": 3.8565387117761873, "grad_norm": 1.6153665781021118, "learning_rate": 6.39239519198879e-06, "loss": 0.7158, "step": 11855 }, { "epoch": 3.8581652569941447, "grad_norm": 1.6823304891586304, "learning_rate": 6.375169276219694e-06, "loss": 0.7022, "step": 11860 }, { "epoch": 3.8597918022121016, "grad_norm": 1.4911738634109497, "learning_rate": 6.357963209104106e-06, "loss": 0.7042, "step": 11865 }, { "epoch": 3.8614183474300585, "grad_norm": 1.8320478200912476, "learning_rate": 6.340777008978696e-06, "loss": 0.707, "step": 11870 }, { "epoch": 3.8630448926480154, "grad_norm": 1.6323111057281494, "learning_rate": 6.32361069415896e-06, "loss": 0.6959, "step": 11875 }, { "epoch": 3.864671437865973, "grad_norm": 1.4077821969985962, "learning_rate": 6.306464282939184e-06, "loss": 0.7378, "step": 11880 }, { "epoch": 3.8662979830839297, "grad_norm": 1.5527182817459106, "learning_rate": 6.289337793592468e-06, "loss": 0.7099, "step": 11885 }, { "epoch": 3.867924528301887, "grad_norm": 1.5946862697601318, "learning_rate": 6.27223124437066e-06, "loss": 0.7073, "step": 11890 }, { "epoch": 3.869551073519844, "grad_norm": 1.5599662065505981, "learning_rate": 6.255144653504382e-06, "loss": 0.7135, "step": 11895 }, { "epoch": 3.871177618737801, "grad_norm": 1.7310166358947754, "learning_rate": 6.238078039202958e-06, "loss": 0.7038, "step": 11900 }, { "epoch": 3.8728041639557578, "grad_norm": 1.7168464660644531, "learning_rate": 6.221031419654444e-06, "loss": 0.761, "step": 11905 }, { "epoch": 3.874430709173715, "grad_norm": 1.5442029237747192, "learning_rate": 6.204004813025568e-06, "loss": 0.7486, "step": 11910 }, { "epoch": 3.876057254391672, "grad_norm": 1.5637699365615845, "learning_rate": 6.1869982374617495e-06, "loss": 0.723, "step": 11915 }, { "epoch": 3.8776837996096294, "grad_norm": 1.6089749336242676, "learning_rate": 6.170011711087051e-06, "loss": 0.7233, "step": 11920 }, { "epoch": 3.8793103448275863, "grad_norm": 1.4052361249923706, "learning_rate": 6.153045252004177e-06, "loss": 0.7021, "step": 11925 }, { "epoch": 3.880936890045543, "grad_norm": 1.3666505813598633, "learning_rate": 6.136098878294424e-06, "loss": 0.7292, "step": 11930 }, { "epoch": 3.8825634352635, "grad_norm": 1.4129390716552734, "learning_rate": 6.119172608017718e-06, "loss": 0.7016, "step": 11935 }, { "epoch": 3.8841899804814575, "grad_norm": 1.414857029914856, "learning_rate": 6.102266459212519e-06, "loss": 0.6861, "step": 11940 }, { "epoch": 3.8858165256994144, "grad_norm": 1.564656376838684, "learning_rate": 6.08538044989588e-06, "loss": 0.709, "step": 11945 }, { "epoch": 3.8874430709173717, "grad_norm": 1.5671226978302002, "learning_rate": 6.068514598063371e-06, "loss": 0.7231, "step": 11950 }, { "epoch": 3.8890696161353286, "grad_norm": 1.7119030952453613, "learning_rate": 6.051668921689094e-06, "loss": 0.7048, "step": 11955 }, { "epoch": 3.8906961613532856, "grad_norm": 1.4766937494277954, "learning_rate": 6.034843438725629e-06, "loss": 0.692, "step": 11960 }, { "epoch": 3.8923227065712425, "grad_norm": 1.5335886478424072, "learning_rate": 6.0180381671040596e-06, "loss": 0.7016, "step": 11965 }, { "epoch": 3.8939492517892, "grad_norm": 1.3766030073165894, "learning_rate": 6.001253124733908e-06, "loss": 0.723, "step": 11970 }, { "epoch": 3.8955757970071567, "grad_norm": 1.5205347537994385, "learning_rate": 5.9844883295031515e-06, "loss": 0.7071, "step": 11975 }, { "epoch": 3.897202342225114, "grad_norm": 1.586621642112732, "learning_rate": 5.967743799278189e-06, "loss": 0.7154, "step": 11980 }, { "epoch": 3.898828887443071, "grad_norm": 1.4337161779403687, "learning_rate": 5.9510195519038245e-06, "loss": 0.7165, "step": 11985 }, { "epoch": 3.900455432661028, "grad_norm": 1.614888072013855, "learning_rate": 5.934315605203231e-06, "loss": 0.7493, "step": 11990 }, { "epoch": 3.902081977878985, "grad_norm": 1.456457257270813, "learning_rate": 5.917631976977975e-06, "loss": 0.6958, "step": 11995 }, { "epoch": 3.903708523096942, "grad_norm": 1.5614268779754639, "learning_rate": 5.900968685007932e-06, "loss": 0.6953, "step": 12000 }, { "epoch": 3.905335068314899, "grad_norm": 1.7669767141342163, "learning_rate": 5.884325747051336e-06, "loss": 0.7227, "step": 12005 }, { "epoch": 3.9069616135328564, "grad_norm": 1.4956378936767578, "learning_rate": 5.867703180844722e-06, "loss": 0.7014, "step": 12010 }, { "epoch": 3.9085881587508133, "grad_norm": 1.2728540897369385, "learning_rate": 5.851101004102907e-06, "loss": 0.7105, "step": 12015 }, { "epoch": 3.9102147039687702, "grad_norm": 1.540703535079956, "learning_rate": 5.834519234518992e-06, "loss": 0.6949, "step": 12020 }, { "epoch": 3.911841249186727, "grad_norm": 1.651149034500122, "learning_rate": 5.817957889764308e-06, "loss": 0.7213, "step": 12025 }, { "epoch": 3.9134677944046845, "grad_norm": 1.5773632526397705, "learning_rate": 5.8014169874884474e-06, "loss": 0.7425, "step": 12030 }, { "epoch": 3.9150943396226414, "grad_norm": 1.5421687364578247, "learning_rate": 5.784896545319187e-06, "loss": 0.6998, "step": 12035 }, { "epoch": 3.9167208848405988, "grad_norm": 1.7480581998825073, "learning_rate": 5.7683965808625205e-06, "loss": 0.7109, "step": 12040 }, { "epoch": 3.9183474300585557, "grad_norm": 1.5883381366729736, "learning_rate": 5.751917111702612e-06, "loss": 0.7113, "step": 12045 }, { "epoch": 3.9199739752765126, "grad_norm": 1.4139646291732788, "learning_rate": 5.735458155401793e-06, "loss": 0.6857, "step": 12050 }, { "epoch": 3.9216005204944695, "grad_norm": 1.2988675832748413, "learning_rate": 5.719019729500508e-06, "loss": 0.71, "step": 12055 }, { "epoch": 3.923227065712427, "grad_norm": 1.5632492303848267, "learning_rate": 5.702601851517353e-06, "loss": 0.6829, "step": 12060 }, { "epoch": 3.9248536109303838, "grad_norm": 1.468011498451233, "learning_rate": 5.686204538948997e-06, "loss": 0.6898, "step": 12065 }, { "epoch": 3.926480156148341, "grad_norm": 1.4638512134552002, "learning_rate": 5.669827809270214e-06, "loss": 0.6786, "step": 12070 }, { "epoch": 3.928106701366298, "grad_norm": 1.6212648153305054, "learning_rate": 5.653471679933839e-06, "loss": 0.6879, "step": 12075 }, { "epoch": 3.929733246584255, "grad_norm": 1.6373189687728882, "learning_rate": 5.6371361683707545e-06, "loss": 0.6968, "step": 12080 }, { "epoch": 3.931359791802212, "grad_norm": 1.5027360916137695, "learning_rate": 5.62082129198985e-06, "loss": 0.6934, "step": 12085 }, { "epoch": 3.932986337020169, "grad_norm": 1.5293148756027222, "learning_rate": 5.604527068178056e-06, "loss": 0.6626, "step": 12090 }, { "epoch": 3.934612882238126, "grad_norm": 1.7508997917175293, "learning_rate": 5.58825351430026e-06, "loss": 0.6956, "step": 12095 }, { "epoch": 3.9362394274560835, "grad_norm": 1.5696589946746826, "learning_rate": 5.572000647699344e-06, "loss": 0.6992, "step": 12100 }, { "epoch": 3.9378659726740404, "grad_norm": 1.512899398803711, "learning_rate": 5.555768485696144e-06, "loss": 0.7077, "step": 12105 }, { "epoch": 3.9394925178919973, "grad_norm": 1.4992002248764038, "learning_rate": 5.539557045589425e-06, "loss": 0.6967, "step": 12110 }, { "epoch": 3.941119063109954, "grad_norm": 1.5602352619171143, "learning_rate": 5.523366344655856e-06, "loss": 0.7138, "step": 12115 }, { "epoch": 3.9427456083279115, "grad_norm": 1.5859862565994263, "learning_rate": 5.507196400150033e-06, "loss": 0.7203, "step": 12120 }, { "epoch": 3.9443721535458685, "grad_norm": 1.4386961460113525, "learning_rate": 5.491047229304397e-06, "loss": 0.7451, "step": 12125 }, { "epoch": 3.945998698763826, "grad_norm": 1.5099098682403564, "learning_rate": 5.474918849329281e-06, "loss": 0.7191, "step": 12130 }, { "epoch": 3.9476252439817827, "grad_norm": 1.4574263095855713, "learning_rate": 5.4588112774128314e-06, "loss": 0.6991, "step": 12135 }, { "epoch": 3.9492517891997396, "grad_norm": 1.406768560409546, "learning_rate": 5.442724530721052e-06, "loss": 0.7166, "step": 12140 }, { "epoch": 3.9508783344176965, "grad_norm": 1.430981159210205, "learning_rate": 5.42665862639774e-06, "loss": 0.6806, "step": 12145 }, { "epoch": 3.952504879635654, "grad_norm": 1.6555246114730835, "learning_rate": 5.410613581564464e-06, "loss": 0.6979, "step": 12150 }, { "epoch": 3.954131424853611, "grad_norm": 1.982032299041748, "learning_rate": 5.394589413320589e-06, "loss": 0.7127, "step": 12155 }, { "epoch": 3.955757970071568, "grad_norm": 1.742348551750183, "learning_rate": 5.378586138743203e-06, "loss": 0.6923, "step": 12160 }, { "epoch": 3.957384515289525, "grad_norm": 1.5163263082504272, "learning_rate": 5.3626037748871565e-06, "loss": 0.7354, "step": 12165 }, { "epoch": 3.959011060507482, "grad_norm": 1.6295777559280396, "learning_rate": 5.346642338784985e-06, "loss": 0.729, "step": 12170 }, { "epoch": 3.9606376057254393, "grad_norm": 1.4111744165420532, "learning_rate": 5.330701847446962e-06, "loss": 0.7211, "step": 12175 }, { "epoch": 3.9622641509433962, "grad_norm": 1.8482940196990967, "learning_rate": 5.314782317860998e-06, "loss": 0.7029, "step": 12180 }, { "epoch": 3.963890696161353, "grad_norm": 1.4704492092132568, "learning_rate": 5.29888376699269e-06, "loss": 0.7066, "step": 12185 }, { "epoch": 3.9655172413793105, "grad_norm": 1.612250804901123, "learning_rate": 5.2830062117852654e-06, "loss": 0.6939, "step": 12190 }, { "epoch": 3.9671437865972674, "grad_norm": 1.7271686792373657, "learning_rate": 5.267149669159588e-06, "loss": 0.7266, "step": 12195 }, { "epoch": 3.9687703318152243, "grad_norm": 1.6207488775253296, "learning_rate": 5.251314156014109e-06, "loss": 0.6986, "step": 12200 }, { "epoch": 3.9703968770331817, "grad_norm": 1.619434118270874, "learning_rate": 5.235499689224885e-06, "loss": 0.6933, "step": 12205 }, { "epoch": 3.9720234222511386, "grad_norm": 1.500880479812622, "learning_rate": 5.219706285645545e-06, "loss": 0.7316, "step": 12210 }, { "epoch": 3.9736499674690955, "grad_norm": 1.47292959690094, "learning_rate": 5.203933962107266e-06, "loss": 0.7314, "step": 12215 }, { "epoch": 3.975276512687053, "grad_norm": 1.4974173307418823, "learning_rate": 5.1881827354187454e-06, "loss": 0.7031, "step": 12220 }, { "epoch": 3.9769030579050098, "grad_norm": 1.5128178596496582, "learning_rate": 5.172452622366228e-06, "loss": 0.7062, "step": 12225 }, { "epoch": 3.9785296031229667, "grad_norm": 1.7001745700836182, "learning_rate": 5.156743639713421e-06, "loss": 0.6907, "step": 12230 }, { "epoch": 3.980156148340924, "grad_norm": 1.679190754890442, "learning_rate": 5.141055804201541e-06, "loss": 0.7094, "step": 12235 }, { "epoch": 3.981782693558881, "grad_norm": 1.4886915683746338, "learning_rate": 5.12538913254926e-06, "loss": 0.6865, "step": 12240 }, { "epoch": 3.983409238776838, "grad_norm": 1.720151662826538, "learning_rate": 5.109743641452699e-06, "loss": 0.6783, "step": 12245 }, { "epoch": 3.985035783994795, "grad_norm": 1.8675968647003174, "learning_rate": 5.094119347585391e-06, "loss": 0.7389, "step": 12250 }, { "epoch": 3.986662329212752, "grad_norm": 1.5618263483047485, "learning_rate": 5.078516267598299e-06, "loss": 0.6983, "step": 12255 }, { "epoch": 3.988288874430709, "grad_norm": 1.493572473526001, "learning_rate": 5.062934418119761e-06, "loss": 0.6893, "step": 12260 }, { "epoch": 3.9899154196486664, "grad_norm": 1.452642798423767, "learning_rate": 5.047373815755496e-06, "loss": 0.714, "step": 12265 }, { "epoch": 3.9915419648666233, "grad_norm": 1.5139330625534058, "learning_rate": 5.03183447708859e-06, "loss": 0.7053, "step": 12270 }, { "epoch": 3.9931685100845806, "grad_norm": 1.619027853012085, "learning_rate": 5.016316418679454e-06, "loss": 0.7303, "step": 12275 }, { "epoch": 3.9947950553025375, "grad_norm": 1.5129636526107788, "learning_rate": 5.000819657065833e-06, "loss": 0.7102, "step": 12280 }, { "epoch": 3.9964216005204944, "grad_norm": 1.4124982357025146, "learning_rate": 4.985344208762757e-06, "loss": 0.716, "step": 12285 }, { "epoch": 3.9980481457384514, "grad_norm": 1.375887155532837, "learning_rate": 4.9698900902625666e-06, "loss": 0.7081, "step": 12290 }, { "epoch": 3.9996746909564087, "grad_norm": 1.608909010887146, "learning_rate": 4.954457318034841e-06, "loss": 0.7298, "step": 12295 }, { "epoch": 4.0, "eval_f1": 0.8167598961549908, "eval_loss": 0.42578125, "eval_precision": 0.8178258436838884, "eval_recall": 0.8160128163392577, "eval_runtime": 386.4104, "eval_samples_per_second": 1018.179, "eval_steps_per_second": 1.99, "step": 12296 }, { "epoch": 4.001301236174366, "grad_norm": 1.5979492664337158, "learning_rate": 4.939045908526441e-06, "loss": 0.6897, "step": 12300 }, { "epoch": 4.002927781392323, "grad_norm": 1.8433144092559814, "learning_rate": 4.92365587816144e-06, "loss": 0.6952, "step": 12305 }, { "epoch": 4.00455432661028, "grad_norm": 1.5285773277282715, "learning_rate": 4.908287243341147e-06, "loss": 0.6506, "step": 12310 }, { "epoch": 4.006180871828237, "grad_norm": 1.5420926809310913, "learning_rate": 4.892940020444043e-06, "loss": 0.6745, "step": 12315 }, { "epoch": 4.007807417046194, "grad_norm": 1.4821486473083496, "learning_rate": 4.877614225825816e-06, "loss": 0.6907, "step": 12320 }, { "epoch": 4.009433962264151, "grad_norm": 1.7074490785598755, "learning_rate": 4.862309875819299e-06, "loss": 0.6703, "step": 12325 }, { "epoch": 4.011060507482108, "grad_norm": 1.6337097883224487, "learning_rate": 4.8470269867344764e-06, "loss": 0.6933, "step": 12330 }, { "epoch": 4.012687052700065, "grad_norm": 1.682141900062561, "learning_rate": 4.831765574858471e-06, "loss": 0.682, "step": 12335 }, { "epoch": 4.014313597918022, "grad_norm": 1.859763503074646, "learning_rate": 4.816525656455512e-06, "loss": 0.6895, "step": 12340 }, { "epoch": 4.015940143135979, "grad_norm": 1.5202829837799072, "learning_rate": 4.801307247766912e-06, "loss": 0.6565, "step": 12345 }, { "epoch": 4.017566688353936, "grad_norm": 1.6259644031524658, "learning_rate": 4.786110365011079e-06, "loss": 0.6937, "step": 12350 }, { "epoch": 4.019193233571893, "grad_norm": 1.560270071029663, "learning_rate": 4.770935024383458e-06, "loss": 0.6794, "step": 12355 }, { "epoch": 4.020819778789851, "grad_norm": 1.4490845203399658, "learning_rate": 4.755781242056556e-06, "loss": 0.6765, "step": 12360 }, { "epoch": 4.022446324007808, "grad_norm": 1.666902780532837, "learning_rate": 4.740649034179898e-06, "loss": 0.6692, "step": 12365 }, { "epoch": 4.024072869225765, "grad_norm": 1.64743173122406, "learning_rate": 4.7255384168800235e-06, "loss": 0.692, "step": 12370 }, { "epoch": 4.0256994144437215, "grad_norm": 1.4837696552276611, "learning_rate": 4.7104494062604445e-06, "loss": 0.6717, "step": 12375 }, { "epoch": 4.027325959661678, "grad_norm": 1.4459526538848877, "learning_rate": 4.695382018401673e-06, "loss": 0.6696, "step": 12380 }, { "epoch": 4.028952504879635, "grad_norm": 1.537183403968811, "learning_rate": 4.680336269361146e-06, "loss": 0.6938, "step": 12385 }, { "epoch": 4.030579050097593, "grad_norm": 1.6677672863006592, "learning_rate": 4.665312175173267e-06, "loss": 0.6822, "step": 12390 }, { "epoch": 4.03220559531555, "grad_norm": 1.510717749595642, "learning_rate": 4.650309751849349e-06, "loss": 0.6688, "step": 12395 }, { "epoch": 4.033832140533507, "grad_norm": 1.680513858795166, "learning_rate": 4.635329015377621e-06, "loss": 0.6836, "step": 12400 }, { "epoch": 4.035458685751464, "grad_norm": 1.488144040107727, "learning_rate": 4.620369981723174e-06, "loss": 0.707, "step": 12405 }, { "epoch": 4.037085230969421, "grad_norm": 1.6111326217651367, "learning_rate": 4.605432666828002e-06, "loss": 0.6852, "step": 12410 }, { "epoch": 4.038711776187378, "grad_norm": 1.463124394416809, "learning_rate": 4.590517086610938e-06, "loss": 0.6729, "step": 12415 }, { "epoch": 4.040338321405335, "grad_norm": 1.7753697633743286, "learning_rate": 4.575623256967646e-06, "loss": 0.7025, "step": 12420 }, { "epoch": 4.041964866623292, "grad_norm": 1.5999348163604736, "learning_rate": 4.560751193770619e-06, "loss": 0.6775, "step": 12425 }, { "epoch": 4.043591411841249, "grad_norm": 1.6959277391433716, "learning_rate": 4.545900912869156e-06, "loss": 0.7005, "step": 12430 }, { "epoch": 4.045217957059206, "grad_norm": 1.6232707500457764, "learning_rate": 4.531072430089339e-06, "loss": 0.6567, "step": 12435 }, { "epoch": 4.046844502277163, "grad_norm": 1.6723641157150269, "learning_rate": 4.516265761234012e-06, "loss": 0.7055, "step": 12440 }, { "epoch": 4.04847104749512, "grad_norm": 1.6180615425109863, "learning_rate": 4.501480922082787e-06, "loss": 0.6653, "step": 12445 }, { "epoch": 4.050097592713078, "grad_norm": 1.7284014225006104, "learning_rate": 4.486717928391993e-06, "loss": 0.6816, "step": 12450 }, { "epoch": 4.051724137931035, "grad_norm": 1.5860096216201782, "learning_rate": 4.471976795894692e-06, "loss": 0.6728, "step": 12455 }, { "epoch": 4.053350683148992, "grad_norm": 1.8972523212432861, "learning_rate": 4.457257540300647e-06, "loss": 0.6859, "step": 12460 }, { "epoch": 4.0549772283669485, "grad_norm": 1.5799185037612915, "learning_rate": 4.442560177296307e-06, "loss": 0.7421, "step": 12465 }, { "epoch": 4.056603773584905, "grad_norm": 1.630070686340332, "learning_rate": 4.427884722544776e-06, "loss": 0.6785, "step": 12470 }, { "epoch": 4.058230318802862, "grad_norm": 1.4304271936416626, "learning_rate": 4.413231191685838e-06, "loss": 0.6587, "step": 12475 }, { "epoch": 4.05985686402082, "grad_norm": 1.905627965927124, "learning_rate": 4.398599600335876e-06, "loss": 0.6798, "step": 12480 }, { "epoch": 4.061483409238777, "grad_norm": 1.5567177534103394, "learning_rate": 4.383989964087923e-06, "loss": 0.6677, "step": 12485 }, { "epoch": 4.063109954456734, "grad_norm": 1.4872279167175293, "learning_rate": 4.369402298511599e-06, "loss": 0.6332, "step": 12490 }, { "epoch": 4.064736499674691, "grad_norm": 1.7089390754699707, "learning_rate": 4.354836619153124e-06, "loss": 0.6998, "step": 12495 }, { "epoch": 4.066363044892648, "grad_norm": 1.5542322397232056, "learning_rate": 4.3402929415352625e-06, "loss": 0.6716, "step": 12500 }, { "epoch": 4.067989590110605, "grad_norm": 1.5865058898925781, "learning_rate": 4.325771281157356e-06, "loss": 0.6907, "step": 12505 }, { "epoch": 4.0696161353285625, "grad_norm": 1.6408686637878418, "learning_rate": 4.311271653495261e-06, "loss": 0.6618, "step": 12510 }, { "epoch": 4.071242680546519, "grad_norm": 1.6281955242156982, "learning_rate": 4.296794074001376e-06, "loss": 0.6737, "step": 12515 }, { "epoch": 4.072869225764476, "grad_norm": 1.601310133934021, "learning_rate": 4.282338558104573e-06, "loss": 0.6556, "step": 12520 }, { "epoch": 4.074495770982433, "grad_norm": 1.5427731275558472, "learning_rate": 4.267905121210253e-06, "loss": 0.669, "step": 12525 }, { "epoch": 4.07612231620039, "grad_norm": 1.7799477577209473, "learning_rate": 4.2534937787002405e-06, "loss": 0.6735, "step": 12530 }, { "epoch": 4.077748861418347, "grad_norm": 1.5699180364608765, "learning_rate": 4.239104545932854e-06, "loss": 0.6866, "step": 12535 }, { "epoch": 4.079375406636305, "grad_norm": 1.4719772338867188, "learning_rate": 4.224737438242815e-06, "loss": 0.6706, "step": 12540 }, { "epoch": 4.081001951854262, "grad_norm": 1.5109868049621582, "learning_rate": 4.210392470941288e-06, "loss": 0.6644, "step": 12545 }, { "epoch": 4.082628497072219, "grad_norm": 1.5972977876663208, "learning_rate": 4.196069659315846e-06, "loss": 0.6788, "step": 12550 }, { "epoch": 4.0842550422901756, "grad_norm": 1.3675533533096313, "learning_rate": 4.181769018630422e-06, "loss": 0.6676, "step": 12555 }, { "epoch": 4.0858815875081325, "grad_norm": 1.928117036819458, "learning_rate": 4.167490564125362e-06, "loss": 0.6735, "step": 12560 }, { "epoch": 4.087508132726089, "grad_norm": 1.7289471626281738, "learning_rate": 4.153234311017332e-06, "loss": 0.7342, "step": 12565 }, { "epoch": 4.089134677944047, "grad_norm": 1.812528371810913, "learning_rate": 4.13900027449936e-06, "loss": 0.6909, "step": 12570 }, { "epoch": 4.090761223162004, "grad_norm": 1.539907693862915, "learning_rate": 4.124788469740784e-06, "loss": 0.69, "step": 12575 }, { "epoch": 4.092387768379961, "grad_norm": 1.56931471824646, "learning_rate": 4.110598911887259e-06, "loss": 0.6845, "step": 12580 }, { "epoch": 4.094014313597918, "grad_norm": 1.5432919263839722, "learning_rate": 4.096431616060717e-06, "loss": 0.6964, "step": 12585 }, { "epoch": 4.095640858815875, "grad_norm": 2.0211055278778076, "learning_rate": 4.082286597359395e-06, "loss": 0.6879, "step": 12590 }, { "epoch": 4.097267404033832, "grad_norm": 1.4614042043685913, "learning_rate": 4.0681638708577495e-06, "loss": 0.658, "step": 12595 }, { "epoch": 4.0988939492517895, "grad_norm": 1.8450242280960083, "learning_rate": 4.054063451606518e-06, "loss": 0.6856, "step": 12600 }, { "epoch": 4.100520494469746, "grad_norm": 1.6258832216262817, "learning_rate": 4.039985354632633e-06, "loss": 0.6931, "step": 12605 }, { "epoch": 4.102147039687703, "grad_norm": 1.6307175159454346, "learning_rate": 4.025929594939262e-06, "loss": 0.678, "step": 12610 }, { "epoch": 4.10377358490566, "grad_norm": 1.506934404373169, "learning_rate": 4.0118961875057485e-06, "loss": 0.6822, "step": 12615 }, { "epoch": 4.105400130123617, "grad_norm": 1.5586029291152954, "learning_rate": 3.997885147287628e-06, "loss": 0.6479, "step": 12620 }, { "epoch": 4.107026675341574, "grad_norm": 1.603355884552002, "learning_rate": 3.983896489216596e-06, "loss": 0.6678, "step": 12625 }, { "epoch": 4.108653220559532, "grad_norm": 1.6373467445373535, "learning_rate": 3.969930228200497e-06, "loss": 0.7098, "step": 12630 }, { "epoch": 4.110279765777489, "grad_norm": 1.5311671495437622, "learning_rate": 3.955986379123297e-06, "loss": 0.6765, "step": 12635 }, { "epoch": 4.111906310995446, "grad_norm": 1.7827900648117065, "learning_rate": 3.94206495684509e-06, "loss": 0.6898, "step": 12640 }, { "epoch": 4.113532856213403, "grad_norm": 1.8668522834777832, "learning_rate": 3.928165976202058e-06, "loss": 0.6806, "step": 12645 }, { "epoch": 4.1151594014313595, "grad_norm": 1.7353311777114868, "learning_rate": 3.914289452006478e-06, "loss": 0.6992, "step": 12650 }, { "epoch": 4.116785946649317, "grad_norm": 1.4650397300720215, "learning_rate": 3.900435399046684e-06, "loss": 0.6789, "step": 12655 }, { "epoch": 4.118412491867274, "grad_norm": 1.5249738693237305, "learning_rate": 3.88660383208708e-06, "loss": 0.7058, "step": 12660 }, { "epoch": 4.120039037085231, "grad_norm": 1.560050129890442, "learning_rate": 3.872794765868079e-06, "loss": 0.6854, "step": 12665 }, { "epoch": 4.121665582303188, "grad_norm": 1.5756725072860718, "learning_rate": 3.859008215106141e-06, "loss": 0.6552, "step": 12670 }, { "epoch": 4.123292127521145, "grad_norm": 1.4248074293136597, "learning_rate": 3.845244194493711e-06, "loss": 0.6942, "step": 12675 }, { "epoch": 4.124918672739102, "grad_norm": 1.696121335029602, "learning_rate": 3.831502718699237e-06, "loss": 0.7002, "step": 12680 }, { "epoch": 4.126545217957059, "grad_norm": 1.8411388397216797, "learning_rate": 3.817783802367137e-06, "loss": 0.6649, "step": 12685 }, { "epoch": 4.1281717631750166, "grad_norm": 1.5562666654586792, "learning_rate": 3.8040874601177862e-06, "loss": 0.7077, "step": 12690 }, { "epoch": 4.1297983083929735, "grad_norm": 1.6934025287628174, "learning_rate": 3.790413706547505e-06, "loss": 0.6674, "step": 12695 }, { "epoch": 4.13142485361093, "grad_norm": 1.6206001043319702, "learning_rate": 3.7767625562285304e-06, "loss": 0.6709, "step": 12700 }, { "epoch": 4.133051398828887, "grad_norm": 1.864793300628662, "learning_rate": 3.763134023709031e-06, "loss": 0.6795, "step": 12705 }, { "epoch": 4.134677944046844, "grad_norm": 1.8125206232070923, "learning_rate": 3.7495281235130465e-06, "loss": 0.6682, "step": 12710 }, { "epoch": 4.136304489264802, "grad_norm": 1.691428542137146, "learning_rate": 3.7359448701405147e-06, "loss": 0.6771, "step": 12715 }, { "epoch": 4.137931034482759, "grad_norm": 1.7985832691192627, "learning_rate": 3.7223842780672367e-06, "loss": 0.6826, "step": 12720 }, { "epoch": 4.139557579700716, "grad_norm": 1.7143083810806274, "learning_rate": 3.7088463617448637e-06, "loss": 0.6903, "step": 12725 }, { "epoch": 4.141184124918673, "grad_norm": 1.7825533151626587, "learning_rate": 3.6953311356008657e-06, "loss": 0.6839, "step": 12730 }, { "epoch": 4.14281067013663, "grad_norm": 1.5310969352722168, "learning_rate": 3.6818386140385575e-06, "loss": 0.6655, "step": 12735 }, { "epoch": 4.1444372153545865, "grad_norm": 1.5686269998550415, "learning_rate": 3.6683688114370283e-06, "loss": 0.6642, "step": 12740 }, { "epoch": 4.146063760572544, "grad_norm": 1.6279234886169434, "learning_rate": 3.6549217421511795e-06, "loss": 0.6815, "step": 12745 }, { "epoch": 4.147690305790501, "grad_norm": 1.578210711479187, "learning_rate": 3.6414974205116746e-06, "loss": 0.6925, "step": 12750 }, { "epoch": 4.149316851008458, "grad_norm": 1.667686939239502, "learning_rate": 3.6280958608249456e-06, "loss": 0.6932, "step": 12755 }, { "epoch": 4.150943396226415, "grad_norm": 1.4691816568374634, "learning_rate": 3.614717077373145e-06, "loss": 0.7092, "step": 12760 }, { "epoch": 4.152569941444372, "grad_norm": 1.5388782024383545, "learning_rate": 3.601361084414176e-06, "loss": 0.6638, "step": 12765 }, { "epoch": 4.154196486662329, "grad_norm": 1.5443003177642822, "learning_rate": 3.588027896181631e-06, "loss": 0.6901, "step": 12770 }, { "epoch": 4.155823031880287, "grad_norm": 1.487541675567627, "learning_rate": 3.5747175268848188e-06, "loss": 0.6918, "step": 12775 }, { "epoch": 4.157449577098244, "grad_norm": 1.6623897552490234, "learning_rate": 3.5614299907087227e-06, "loss": 0.6596, "step": 12780 }, { "epoch": 4.1590761223162005, "grad_norm": 1.71774423122406, "learning_rate": 3.5481653018139995e-06, "loss": 0.6945, "step": 12785 }, { "epoch": 4.160702667534157, "grad_norm": 1.5003087520599365, "learning_rate": 3.534923474336932e-06, "loss": 0.664, "step": 12790 }, { "epoch": 4.162329212752114, "grad_norm": 1.6198502779006958, "learning_rate": 3.521704522389477e-06, "loss": 0.7181, "step": 12795 }, { "epoch": 4.163955757970071, "grad_norm": 1.5323489904403687, "learning_rate": 3.508508460059179e-06, "loss": 0.6796, "step": 12800 }, { "epoch": 4.165582303188029, "grad_norm": 1.8603856563568115, "learning_rate": 3.4953353014092057e-06, "loss": 0.6682, "step": 12805 }, { "epoch": 4.167208848405986, "grad_norm": 1.5206300020217896, "learning_rate": 3.4821850604783186e-06, "loss": 0.6934, "step": 12810 }, { "epoch": 4.168835393623943, "grad_norm": 1.6180344820022583, "learning_rate": 3.469057751280852e-06, "loss": 0.7059, "step": 12815 }, { "epoch": 4.1704619388419, "grad_norm": 1.4293873310089111, "learning_rate": 3.45595338780669e-06, "loss": 0.6857, "step": 12820 }, { "epoch": 4.172088484059857, "grad_norm": 1.675456166267395, "learning_rate": 3.4428719840212814e-06, "loss": 0.6898, "step": 12825 }, { "epoch": 4.173715029277814, "grad_norm": 1.5427577495574951, "learning_rate": 3.4298135538656017e-06, "loss": 0.6733, "step": 12830 }, { "epoch": 4.175341574495771, "grad_norm": 1.6550896167755127, "learning_rate": 3.4167781112561275e-06, "loss": 0.7056, "step": 12835 }, { "epoch": 4.176968119713728, "grad_norm": 1.6988176107406616, "learning_rate": 3.40376567008486e-06, "loss": 0.6644, "step": 12840 }, { "epoch": 4.178594664931685, "grad_norm": 1.7683417797088623, "learning_rate": 3.3907762442192735e-06, "loss": 0.6745, "step": 12845 }, { "epoch": 4.180221210149642, "grad_norm": 1.8790183067321777, "learning_rate": 3.3778098475023277e-06, "loss": 0.6803, "step": 12850 }, { "epoch": 4.181847755367599, "grad_norm": 1.4988985061645508, "learning_rate": 3.3648664937524165e-06, "loss": 0.6571, "step": 12855 }, { "epoch": 4.183474300585556, "grad_norm": 1.6136387586593628, "learning_rate": 3.351946196763403e-06, "loss": 0.6794, "step": 12860 }, { "epoch": 4.185100845803514, "grad_norm": 1.5706504583358765, "learning_rate": 3.3390489703045593e-06, "loss": 0.6762, "step": 12865 }, { "epoch": 4.186727391021471, "grad_norm": 1.6014893054962158, "learning_rate": 3.326174828120576e-06, "loss": 0.682, "step": 12870 }, { "epoch": 4.1883539362394275, "grad_norm": 1.7448557615280151, "learning_rate": 3.3133237839315546e-06, "loss": 0.6792, "step": 12875 }, { "epoch": 4.189980481457384, "grad_norm": 1.5360891819000244, "learning_rate": 3.3004958514329692e-06, "loss": 0.6694, "step": 12880 }, { "epoch": 4.191607026675341, "grad_norm": 1.9542195796966553, "learning_rate": 3.2876910442956573e-06, "loss": 0.6677, "step": 12885 }, { "epoch": 4.193233571893298, "grad_norm": 1.6950875520706177, "learning_rate": 3.27490937616583e-06, "loss": 0.6991, "step": 12890 }, { "epoch": 4.194860117111256, "grad_norm": 1.676688313484192, "learning_rate": 3.262150860665017e-06, "loss": 0.6828, "step": 12895 }, { "epoch": 4.196486662329213, "grad_norm": 1.703680157661438, "learning_rate": 3.2494155113901e-06, "loss": 0.6954, "step": 12900 }, { "epoch": 4.19811320754717, "grad_norm": 1.7241615056991577, "learning_rate": 3.2367033419132388e-06, "loss": 0.7001, "step": 12905 }, { "epoch": 4.199739752765127, "grad_norm": 1.5073238611221313, "learning_rate": 3.224014365781933e-06, "loss": 0.7015, "step": 12910 }, { "epoch": 4.201366297983084, "grad_norm": 1.607161521911621, "learning_rate": 3.2113485965189205e-06, "loss": 0.6949, "step": 12915 }, { "epoch": 4.202992843201041, "grad_norm": 1.7603167295455933, "learning_rate": 3.1987060476222435e-06, "loss": 0.6849, "step": 12920 }, { "epoch": 4.204619388418998, "grad_norm": 1.5808464288711548, "learning_rate": 3.1860867325651717e-06, "loss": 0.7032, "step": 12925 }, { "epoch": 4.206245933636955, "grad_norm": 1.5832515954971313, "learning_rate": 3.173490664796233e-06, "loss": 0.6768, "step": 12930 }, { "epoch": 4.207872478854912, "grad_norm": 1.590959072113037, "learning_rate": 3.160917857739165e-06, "loss": 0.6751, "step": 12935 }, { "epoch": 4.209499024072869, "grad_norm": 1.6765626668930054, "learning_rate": 3.1483683247929275e-06, "loss": 0.6679, "step": 12940 }, { "epoch": 4.211125569290826, "grad_norm": 1.6869168281555176, "learning_rate": 3.1358420793316744e-06, "loss": 0.6775, "step": 12945 }, { "epoch": 4.212752114508783, "grad_norm": 1.8405346870422363, "learning_rate": 3.1233391347047476e-06, "loss": 0.6772, "step": 12950 }, { "epoch": 4.214378659726741, "grad_norm": 1.463067650794983, "learning_rate": 3.110859504236635e-06, "loss": 0.6727, "step": 12955 }, { "epoch": 4.216005204944698, "grad_norm": 1.5853489637374878, "learning_rate": 3.0984032012270043e-06, "loss": 0.6655, "step": 12960 }, { "epoch": 4.217631750162655, "grad_norm": 1.45207679271698, "learning_rate": 3.085970238950653e-06, "loss": 0.6527, "step": 12965 }, { "epoch": 4.2192582953806115, "grad_norm": 1.515596628189087, "learning_rate": 3.0735606306574875e-06, "loss": 0.7076, "step": 12970 }, { "epoch": 4.220884840598568, "grad_norm": 1.688957691192627, "learning_rate": 3.0611743895725686e-06, "loss": 0.6656, "step": 12975 }, { "epoch": 4.222511385816525, "grad_norm": 1.6611443758010864, "learning_rate": 3.048811528896006e-06, "loss": 0.6664, "step": 12980 }, { "epoch": 4.224137931034483, "grad_norm": 1.531745433807373, "learning_rate": 3.036472061803025e-06, "loss": 0.6798, "step": 12985 }, { "epoch": 4.22576447625244, "grad_norm": 1.774390697479248, "learning_rate": 3.024156001443901e-06, "loss": 0.6671, "step": 12990 }, { "epoch": 4.227391021470397, "grad_norm": 1.824425220489502, "learning_rate": 3.011863360943984e-06, "loss": 0.6639, "step": 12995 }, { "epoch": 4.229017566688354, "grad_norm": 1.5967600345611572, "learning_rate": 2.99959415340364e-06, "loss": 0.732, "step": 13000 }, { "epoch": 4.230644111906311, "grad_norm": 1.5584049224853516, "learning_rate": 2.987348391898284e-06, "loss": 0.6582, "step": 13005 }, { "epoch": 4.232270657124268, "grad_norm": 1.4888719320297241, "learning_rate": 2.9751260894783362e-06, "loss": 0.6844, "step": 13010 }, { "epoch": 4.233897202342225, "grad_norm": 1.7110730409622192, "learning_rate": 2.962927259169221e-06, "loss": 0.6539, "step": 13015 }, { "epoch": 4.235523747560182, "grad_norm": 1.4865161180496216, "learning_rate": 2.9507519139713364e-06, "loss": 0.665, "step": 13020 }, { "epoch": 4.237150292778139, "grad_norm": 1.6361219882965088, "learning_rate": 2.9386000668600698e-06, "loss": 0.6727, "step": 13025 }, { "epoch": 4.238776837996096, "grad_norm": 1.5675081014633179, "learning_rate": 2.926471730785743e-06, "loss": 0.6857, "step": 13030 }, { "epoch": 4.240403383214053, "grad_norm": 1.6067705154418945, "learning_rate": 2.9143669186736445e-06, "loss": 0.6626, "step": 13035 }, { "epoch": 4.24202992843201, "grad_norm": 1.8827476501464844, "learning_rate": 2.9022856434239796e-06, "loss": 0.6906, "step": 13040 }, { "epoch": 4.243656473649968, "grad_norm": 1.5749931335449219, "learning_rate": 2.8902279179118837e-06, "loss": 0.7139, "step": 13045 }, { "epoch": 4.245283018867925, "grad_norm": 1.9258676767349243, "learning_rate": 2.878193754987374e-06, "loss": 0.6729, "step": 13050 }, { "epoch": 4.246909564085882, "grad_norm": 1.5161265134811401, "learning_rate": 2.866183167475378e-06, "loss": 0.6881, "step": 13055 }, { "epoch": 4.2485361093038385, "grad_norm": 1.6529872417449951, "learning_rate": 2.8541961681756795e-06, "loss": 0.6782, "step": 13060 }, { "epoch": 4.250162654521795, "grad_norm": 1.7905762195587158, "learning_rate": 2.8422327698629405e-06, "loss": 0.6787, "step": 13065 }, { "epoch": 4.251789199739752, "grad_norm": 1.583303451538086, "learning_rate": 2.8302929852866644e-06, "loss": 0.659, "step": 13070 }, { "epoch": 4.25341574495771, "grad_norm": 1.6909085512161255, "learning_rate": 2.818376827171193e-06, "loss": 0.6846, "step": 13075 }, { "epoch": 4.255042290175667, "grad_norm": 1.5445011854171753, "learning_rate": 2.8064843082156787e-06, "loss": 0.653, "step": 13080 }, { "epoch": 4.256668835393624, "grad_norm": 1.884468913078308, "learning_rate": 2.794615441094095e-06, "loss": 0.6332, "step": 13085 }, { "epoch": 4.258295380611581, "grad_norm": 1.826657772064209, "learning_rate": 2.782770238455193e-06, "loss": 0.6651, "step": 13090 }, { "epoch": 4.259921925829538, "grad_norm": 1.6193771362304688, "learning_rate": 2.770948712922522e-06, "loss": 0.6753, "step": 13095 }, { "epoch": 4.261548471047496, "grad_norm": 1.6252689361572266, "learning_rate": 2.759150877094388e-06, "loss": 0.6771, "step": 13100 }, { "epoch": 4.2631750162654525, "grad_norm": 1.763871431350708, "learning_rate": 2.747376743543853e-06, "loss": 0.6316, "step": 13105 }, { "epoch": 4.264801561483409, "grad_norm": 1.6903254985809326, "learning_rate": 2.735626324818727e-06, "loss": 0.6688, "step": 13110 }, { "epoch": 4.266428106701366, "grad_norm": 1.5600754022598267, "learning_rate": 2.7238996334415262e-06, "loss": 0.7071, "step": 13115 }, { "epoch": 4.268054651919323, "grad_norm": 1.6053317785263062, "learning_rate": 2.712196681909507e-06, "loss": 0.6723, "step": 13120 }, { "epoch": 4.26968119713728, "grad_norm": 1.380657434463501, "learning_rate": 2.7005174826946004e-06, "loss": 0.6586, "step": 13125 }, { "epoch": 4.271307742355237, "grad_norm": 1.6451447010040283, "learning_rate": 2.688862048243443e-06, "loss": 0.6726, "step": 13130 }, { "epoch": 4.272934287573195, "grad_norm": 1.5257431268692017, "learning_rate": 2.6772303909773366e-06, "loss": 0.6639, "step": 13135 }, { "epoch": 4.274560832791152, "grad_norm": 1.6463559865951538, "learning_rate": 2.6656225232922492e-06, "loss": 0.6824, "step": 13140 }, { "epoch": 4.276187378009109, "grad_norm": 1.4507416486740112, "learning_rate": 2.6540384575587885e-06, "loss": 0.6563, "step": 13145 }, { "epoch": 4.2778139232270656, "grad_norm": 1.6209659576416016, "learning_rate": 2.642478206122201e-06, "loss": 0.6912, "step": 13150 }, { "epoch": 4.2794404684450225, "grad_norm": 1.6708098649978638, "learning_rate": 2.6309417813023513e-06, "loss": 0.6798, "step": 13155 }, { "epoch": 4.28106701366298, "grad_norm": 1.7898657321929932, "learning_rate": 2.619429195393713e-06, "loss": 0.7098, "step": 13160 }, { "epoch": 4.282693558880937, "grad_norm": 1.6421253681182861, "learning_rate": 2.607940460665359e-06, "loss": 0.7098, "step": 13165 }, { "epoch": 4.284320104098894, "grad_norm": 1.8849862813949585, "learning_rate": 2.5964755893609414e-06, "loss": 0.6837, "step": 13170 }, { "epoch": 4.285946649316851, "grad_norm": 1.5766422748565674, "learning_rate": 2.585034593698668e-06, "loss": 0.6719, "step": 13175 }, { "epoch": 4.287573194534808, "grad_norm": 1.429555892944336, "learning_rate": 2.5736174858713267e-06, "loss": 0.6719, "step": 13180 }, { "epoch": 4.289199739752765, "grad_norm": 1.567388892173767, "learning_rate": 2.5622242780462243e-06, "loss": 0.6734, "step": 13185 }, { "epoch": 4.290826284970722, "grad_norm": 1.562254786491394, "learning_rate": 2.5508549823652114e-06, "loss": 0.7237, "step": 13190 }, { "epoch": 4.2924528301886795, "grad_norm": 1.652184247970581, "learning_rate": 2.5395096109446488e-06, "loss": 0.6946, "step": 13195 }, { "epoch": 4.294079375406636, "grad_norm": 1.6349338293075562, "learning_rate": 2.528188175875412e-06, "loss": 0.6773, "step": 13200 }, { "epoch": 4.295705920624593, "grad_norm": 1.7185473442077637, "learning_rate": 2.516890689222845e-06, "loss": 0.7092, "step": 13205 }, { "epoch": 4.29733246584255, "grad_norm": 1.454410433769226, "learning_rate": 2.5056171630267937e-06, "loss": 0.721, "step": 13210 }, { "epoch": 4.298959011060507, "grad_norm": 1.5690823793411255, "learning_rate": 2.4943676093015513e-06, "loss": 0.6679, "step": 13215 }, { "epoch": 4.300585556278465, "grad_norm": 1.772160291671753, "learning_rate": 2.483142040035874e-06, "loss": 0.7042, "step": 13220 }, { "epoch": 4.302212101496422, "grad_norm": 1.425601601600647, "learning_rate": 2.471940467192957e-06, "loss": 0.677, "step": 13225 }, { "epoch": 4.303838646714379, "grad_norm": 1.7007113695144653, "learning_rate": 2.4607629027104147e-06, "loss": 0.696, "step": 13230 }, { "epoch": 4.305465191932336, "grad_norm": 1.614625334739685, "learning_rate": 2.449609358500288e-06, "loss": 0.6548, "step": 13235 }, { "epoch": 4.307091737150293, "grad_norm": 1.4970104694366455, "learning_rate": 2.440703825716867e-06, "loss": 0.6561, "step": 13240 }, { "epoch": 4.3087182823682495, "grad_norm": 1.6450927257537842, "learning_rate": 2.4295935479336785e-06, "loss": 0.6625, "step": 13245 }, { "epoch": 4.310344827586207, "grad_norm": 1.531559944152832, "learning_rate": 2.4185073236403707e-06, "loss": 0.6672, "step": 13250 }, { "epoch": 4.311971372804164, "grad_norm": 1.7254568338394165, "learning_rate": 2.407445164651631e-06, "loss": 0.6646, "step": 13255 }, { "epoch": 4.313597918022121, "grad_norm": 1.8372946977615356, "learning_rate": 2.396407082756513e-06, "loss": 0.6912, "step": 13260 }, { "epoch": 4.315224463240078, "grad_norm": 1.7858517169952393, "learning_rate": 2.385393089718388e-06, "loss": 0.6965, "step": 13265 }, { "epoch": 4.316851008458035, "grad_norm": 1.5769810676574707, "learning_rate": 2.3744031972749826e-06, "loss": 0.6974, "step": 13270 }, { "epoch": 4.318477553675992, "grad_norm": 1.9316915273666382, "learning_rate": 2.3634374171383173e-06, "loss": 0.6734, "step": 13275 }, { "epoch": 4.32010409889395, "grad_norm": 1.8165911436080933, "learning_rate": 2.352495760994733e-06, "loss": 0.6916, "step": 13280 }, { "epoch": 4.3217306441119065, "grad_norm": 1.5280351638793945, "learning_rate": 2.341578240504838e-06, "loss": 0.7015, "step": 13285 }, { "epoch": 4.3233571893298635, "grad_norm": 1.558218002319336, "learning_rate": 2.3306848673035536e-06, "loss": 0.6872, "step": 13290 }, { "epoch": 4.32498373454782, "grad_norm": 1.6019008159637451, "learning_rate": 2.319815653000035e-06, "loss": 0.6651, "step": 13295 }, { "epoch": 4.326610279765777, "grad_norm": 1.8289910554885864, "learning_rate": 2.308970609177713e-06, "loss": 0.7146, "step": 13300 }, { "epoch": 4.328236824983734, "grad_norm": 1.5692062377929688, "learning_rate": 2.2981497473942432e-06, "loss": 0.6746, "step": 13305 }, { "epoch": 4.329863370201692, "grad_norm": 1.700446605682373, "learning_rate": 2.28735307918152e-06, "loss": 0.6996, "step": 13310 }, { "epoch": 4.331489915419649, "grad_norm": 1.9279206991195679, "learning_rate": 2.2765806160456482e-06, "loss": 0.674, "step": 13315 }, { "epoch": 4.333116460637606, "grad_norm": 1.5956603288650513, "learning_rate": 2.2658323694669498e-06, "loss": 0.6819, "step": 13320 }, { "epoch": 4.334743005855563, "grad_norm": 1.636796474456787, "learning_rate": 2.255108350899923e-06, "loss": 0.6985, "step": 13325 }, { "epoch": 4.33636955107352, "grad_norm": 1.5646538734436035, "learning_rate": 2.24440857177326e-06, "loss": 0.6501, "step": 13330 }, { "epoch": 4.3379960962914765, "grad_norm": 1.8974790573120117, "learning_rate": 2.2337330434898023e-06, "loss": 0.6929, "step": 13335 }, { "epoch": 4.339622641509434, "grad_norm": 1.52352774143219, "learning_rate": 2.2230817774265724e-06, "loss": 0.6614, "step": 13340 }, { "epoch": 4.341249186727391, "grad_norm": 1.5688260793685913, "learning_rate": 2.2124547849347117e-06, "loss": 0.6612, "step": 13345 }, { "epoch": 4.342875731945348, "grad_norm": 1.4662718772888184, "learning_rate": 2.201852077339506e-06, "loss": 0.6755, "step": 13350 }, { "epoch": 4.344502277163305, "grad_norm": 1.6698862314224243, "learning_rate": 2.1912736659403605e-06, "loss": 0.6733, "step": 13355 }, { "epoch": 4.346128822381262, "grad_norm": 1.7690608501434326, "learning_rate": 2.1807195620107914e-06, "loss": 0.6621, "step": 13360 }, { "epoch": 4.347755367599219, "grad_norm": 1.8485875129699707, "learning_rate": 2.1701897767983927e-06, "loss": 0.6741, "step": 13365 }, { "epoch": 4.349381912817177, "grad_norm": 1.6573123931884766, "learning_rate": 2.15968432152486e-06, "loss": 0.7156, "step": 13370 }, { "epoch": 4.351008458035134, "grad_norm": 1.5061167478561401, "learning_rate": 2.149203207385955e-06, "loss": 0.6733, "step": 13375 }, { "epoch": 4.3526350032530905, "grad_norm": 1.6367472410202026, "learning_rate": 2.1387464455514928e-06, "loss": 0.7217, "step": 13380 }, { "epoch": 4.354261548471047, "grad_norm": 1.5949532985687256, "learning_rate": 2.128314047165342e-06, "loss": 0.6955, "step": 13385 }, { "epoch": 4.355888093689004, "grad_norm": 1.562147855758667, "learning_rate": 2.117906023345406e-06, "loss": 0.6978, "step": 13390 }, { "epoch": 4.357514638906961, "grad_norm": 1.4548684358596802, "learning_rate": 2.107522385183619e-06, "loss": 0.6618, "step": 13395 }, { "epoch": 4.359141184124919, "grad_norm": 1.7947598695755005, "learning_rate": 2.097163143745909e-06, "loss": 0.6949, "step": 13400 }, { "epoch": 4.360767729342876, "grad_norm": 1.511186957359314, "learning_rate": 2.08682831007222e-06, "loss": 0.6678, "step": 13405 }, { "epoch": 4.362394274560833, "grad_norm": 1.6913362741470337, "learning_rate": 2.0765178951764774e-06, "loss": 0.6941, "step": 13410 }, { "epoch": 4.36402081977879, "grad_norm": 1.7016710042953491, "learning_rate": 2.0662319100465844e-06, "loss": 0.6876, "step": 13415 }, { "epoch": 4.365647364996747, "grad_norm": 1.5809540748596191, "learning_rate": 2.0559703656444107e-06, "loss": 0.6714, "step": 13420 }, { "epoch": 4.367273910214704, "grad_norm": 1.802018642425537, "learning_rate": 2.0457332729057867e-06, "loss": 0.6748, "step": 13425 }, { "epoch": 4.368900455432661, "grad_norm": 1.5684220790863037, "learning_rate": 2.0355206427404626e-06, "loss": 0.6789, "step": 13430 }, { "epoch": 4.370527000650618, "grad_norm": 1.6480116844177246, "learning_rate": 2.0253324860321443e-06, "loss": 0.6848, "step": 13435 }, { "epoch": 4.372153545868575, "grad_norm": 1.6253386735916138, "learning_rate": 2.015168813638435e-06, "loss": 0.6884, "step": 13440 }, { "epoch": 4.373780091086532, "grad_norm": 1.7358983755111694, "learning_rate": 2.0050296363908593e-06, "loss": 0.6859, "step": 13445 }, { "epoch": 4.375406636304489, "grad_norm": 1.5498124361038208, "learning_rate": 1.9949149650948267e-06, "loss": 0.6925, "step": 13450 }, { "epoch": 4.377033181522446, "grad_norm": 1.4635928869247437, "learning_rate": 1.984824810529645e-06, "loss": 0.6781, "step": 13455 }, { "epoch": 4.378659726740404, "grad_norm": 1.657062292098999, "learning_rate": 1.974759183448477e-06, "loss": 0.6925, "step": 13460 }, { "epoch": 4.380286271958361, "grad_norm": 1.609960913658142, "learning_rate": 1.9647180945783577e-06, "loss": 0.7096, "step": 13465 }, { "epoch": 4.3819128171763175, "grad_norm": 1.8002521991729736, "learning_rate": 1.954701554620164e-06, "loss": 0.6996, "step": 13470 }, { "epoch": 4.383539362394274, "grad_norm": 1.6468158960342407, "learning_rate": 1.9447095742486156e-06, "loss": 0.6646, "step": 13475 }, { "epoch": 4.385165907612231, "grad_norm": 1.8362884521484375, "learning_rate": 1.9347421641122576e-06, "loss": 0.683, "step": 13480 }, { "epoch": 4.386792452830189, "grad_norm": 2.2963192462921143, "learning_rate": 1.9247993348334577e-06, "loss": 0.6776, "step": 13485 }, { "epoch": 4.388418998048146, "grad_norm": 1.860910177230835, "learning_rate": 1.9148810970083725e-06, "loss": 0.6833, "step": 13490 }, { "epoch": 4.390045543266103, "grad_norm": 1.716539740562439, "learning_rate": 1.904987461206967e-06, "loss": 0.6643, "step": 13495 }, { "epoch": 4.39167208848406, "grad_norm": 1.6504241228103638, "learning_rate": 1.8951184379729674e-06, "loss": 0.6981, "step": 13500 }, { "epoch": 4.393298633702017, "grad_norm": 1.5467013120651245, "learning_rate": 1.885274037823892e-06, "loss": 0.6931, "step": 13505 }, { "epoch": 4.394925178919974, "grad_norm": 1.5635037422180176, "learning_rate": 1.8754542712510065e-06, "loss": 0.6632, "step": 13510 }, { "epoch": 4.396551724137931, "grad_norm": 1.438417911529541, "learning_rate": 1.8656591487193288e-06, "loss": 0.6644, "step": 13515 }, { "epoch": 4.398178269355888, "grad_norm": 1.9160609245300293, "learning_rate": 1.8558886806676112e-06, "loss": 0.7025, "step": 13520 }, { "epoch": 4.399804814573845, "grad_norm": 1.5905572175979614, "learning_rate": 1.846142877508325e-06, "loss": 0.6642, "step": 13525 }, { "epoch": 4.401431359791802, "grad_norm": 1.5534197092056274, "learning_rate": 1.8364217496276731e-06, "loss": 0.6957, "step": 13530 }, { "epoch": 4.403057905009759, "grad_norm": 1.393877625465393, "learning_rate": 1.8267253073855379e-06, "loss": 0.6689, "step": 13535 }, { "epoch": 4.404684450227716, "grad_norm": 1.7568193674087524, "learning_rate": 1.8170535611155143e-06, "loss": 0.6915, "step": 13540 }, { "epoch": 4.406310995445674, "grad_norm": 1.5103466510772705, "learning_rate": 1.8074065211248714e-06, "loss": 0.6896, "step": 13545 }, { "epoch": 4.407937540663631, "grad_norm": 1.5108469724655151, "learning_rate": 1.797784197694552e-06, "loss": 0.7004, "step": 13550 }, { "epoch": 4.409564085881588, "grad_norm": 1.4771957397460938, "learning_rate": 1.7881866010791477e-06, "loss": 0.6661, "step": 13555 }, { "epoch": 4.411190631099545, "grad_norm": 1.71317720413208, "learning_rate": 1.7786137415069126e-06, "loss": 0.6821, "step": 13560 }, { "epoch": 4.4128171763175015, "grad_norm": 1.8573901653289795, "learning_rate": 1.7690656291797225e-06, "loss": 0.6987, "step": 13565 }, { "epoch": 4.414443721535458, "grad_norm": 1.504940390586853, "learning_rate": 1.7595422742730905e-06, "loss": 0.6711, "step": 13570 }, { "epoch": 4.416070266753415, "grad_norm": 1.7634164094924927, "learning_rate": 1.7500436869361508e-06, "loss": 0.6681, "step": 13575 }, { "epoch": 4.417696811971373, "grad_norm": 1.6838585138320923, "learning_rate": 1.7405698772916313e-06, "loss": 0.6988, "step": 13580 }, { "epoch": 4.41932335718933, "grad_norm": 1.553808331489563, "learning_rate": 1.7311208554358554e-06, "loss": 0.6664, "step": 13585 }, { "epoch": 4.420949902407287, "grad_norm": 1.5865768194198608, "learning_rate": 1.7216966314387378e-06, "loss": 0.6696, "step": 13590 }, { "epoch": 4.422576447625244, "grad_norm": 1.532080054283142, "learning_rate": 1.7122972153437555e-06, "loss": 0.6424, "step": 13595 }, { "epoch": 4.424202992843201, "grad_norm": 1.817903757095337, "learning_rate": 1.7029226171679542e-06, "loss": 0.6771, "step": 13600 }, { "epoch": 4.4258295380611585, "grad_norm": 1.465944766998291, "learning_rate": 1.6935728469019308e-06, "loss": 0.6703, "step": 13605 }, { "epoch": 4.427456083279115, "grad_norm": 1.7263376712799072, "learning_rate": 1.684247914509826e-06, "loss": 0.6998, "step": 13610 }, { "epoch": 4.429082628497072, "grad_norm": 1.525610327720642, "learning_rate": 1.6749478299292936e-06, "loss": 0.6659, "step": 13615 }, { "epoch": 4.430709173715029, "grad_norm": 1.5177165269851685, "learning_rate": 1.6656726030715358e-06, "loss": 0.6671, "step": 13620 }, { "epoch": 4.432335718932986, "grad_norm": 1.6205544471740723, "learning_rate": 1.6564222438212317e-06, "loss": 0.6931, "step": 13625 }, { "epoch": 4.433962264150943, "grad_norm": 1.642604947090149, "learning_rate": 1.6471967620365846e-06, "loss": 0.6789, "step": 13630 }, { "epoch": 4.4355888093689, "grad_norm": 1.7329658269882202, "learning_rate": 1.6379961675492634e-06, "loss": 0.7002, "step": 13635 }, { "epoch": 4.437215354586858, "grad_norm": 1.8089805841445923, "learning_rate": 1.6288204701644382e-06, "loss": 0.6733, "step": 13640 }, { "epoch": 4.438841899804815, "grad_norm": 1.6879161596298218, "learning_rate": 1.6196696796607375e-06, "loss": 0.6756, "step": 13645 }, { "epoch": 4.440468445022772, "grad_norm": 1.683483600616455, "learning_rate": 1.6105438057902295e-06, "loss": 0.719, "step": 13650 }, { "epoch": 4.4420949902407285, "grad_norm": 1.693522334098816, "learning_rate": 1.601442858278454e-06, "loss": 0.6629, "step": 13655 }, { "epoch": 4.443721535458685, "grad_norm": 1.683858871459961, "learning_rate": 1.592366846824364e-06, "loss": 0.6887, "step": 13660 }, { "epoch": 4.445348080676643, "grad_norm": 1.7067744731903076, "learning_rate": 1.583315781100353e-06, "loss": 0.6755, "step": 13665 }, { "epoch": 4.4469746258946, "grad_norm": 2.003448724746704, "learning_rate": 1.5742896707522242e-06, "loss": 0.6862, "step": 13670 }, { "epoch": 4.448601171112557, "grad_norm": 1.6572003364562988, "learning_rate": 1.5652885253991944e-06, "loss": 0.7304, "step": 13675 }, { "epoch": 4.450227716330514, "grad_norm": 1.6756609678268433, "learning_rate": 1.5563123546338572e-06, "loss": 0.6815, "step": 13680 }, { "epoch": 4.451854261548471, "grad_norm": 1.7195848226547241, "learning_rate": 1.5473611680222045e-06, "loss": 0.6917, "step": 13685 }, { "epoch": 4.453480806766428, "grad_norm": 1.7713993787765503, "learning_rate": 1.5384349751035948e-06, "loss": 0.6803, "step": 13690 }, { "epoch": 4.455107351984386, "grad_norm": 1.955231785774231, "learning_rate": 1.5295337853907604e-06, "loss": 0.673, "step": 13695 }, { "epoch": 4.4567338972023425, "grad_norm": 1.6311743259429932, "learning_rate": 1.5206576083697687e-06, "loss": 0.6646, "step": 13700 }, { "epoch": 4.458360442420299, "grad_norm": 1.8061529397964478, "learning_rate": 1.5118064535000614e-06, "loss": 0.7065, "step": 13705 }, { "epoch": 4.459986987638256, "grad_norm": 1.6449671983718872, "learning_rate": 1.502980330214379e-06, "loss": 0.6658, "step": 13710 }, { "epoch": 4.461613532856213, "grad_norm": 1.6378949880599976, "learning_rate": 1.4941792479188171e-06, "loss": 0.6906, "step": 13715 }, { "epoch": 4.46324007807417, "grad_norm": 1.7537442445755005, "learning_rate": 1.4854032159927562e-06, "loss": 0.7111, "step": 13720 }, { "epoch": 4.464866623292128, "grad_norm": 1.4499577283859253, "learning_rate": 1.4766522437889035e-06, "loss": 0.6431, "step": 13725 }, { "epoch": 4.466493168510085, "grad_norm": 1.5957661867141724, "learning_rate": 1.4679263406332467e-06, "loss": 0.6771, "step": 13730 }, { "epoch": 4.468119713728042, "grad_norm": 1.8479514122009277, "learning_rate": 1.4592255158250605e-06, "loss": 0.6581, "step": 13735 }, { "epoch": 4.469746258945999, "grad_norm": 1.4418613910675049, "learning_rate": 1.450549778636895e-06, "loss": 0.683, "step": 13740 }, { "epoch": 4.4713728041639555, "grad_norm": 1.8812179565429688, "learning_rate": 1.4418991383145675e-06, "loss": 0.6947, "step": 13745 }, { "epoch": 4.4729993493819125, "grad_norm": 1.783433437347412, "learning_rate": 1.43327360407714e-06, "loss": 0.6908, "step": 13750 }, { "epoch": 4.47462589459987, "grad_norm": 1.3892663717269897, "learning_rate": 1.424673185116926e-06, "loss": 0.7027, "step": 13755 }, { "epoch": 4.476252439817827, "grad_norm": 1.7629060745239258, "learning_rate": 1.416097890599466e-06, "loss": 0.7101, "step": 13760 }, { "epoch": 4.477878985035784, "grad_norm": 1.6321301460266113, "learning_rate": 1.4075477296635359e-06, "loss": 0.6994, "step": 13765 }, { "epoch": 4.479505530253741, "grad_norm": 1.5935720205307007, "learning_rate": 1.3990227114211191e-06, "loss": 0.6729, "step": 13770 }, { "epoch": 4.481132075471698, "grad_norm": 1.5852463245391846, "learning_rate": 1.3905228449574066e-06, "loss": 0.6715, "step": 13775 }, { "epoch": 4.482758620689655, "grad_norm": 1.6065924167633057, "learning_rate": 1.3820481393307855e-06, "loss": 0.6763, "step": 13780 }, { "epoch": 4.484385165907613, "grad_norm": 1.6241438388824463, "learning_rate": 1.3735986035728232e-06, "loss": 0.6854, "step": 13785 }, { "epoch": 4.4860117111255695, "grad_norm": 1.6999205350875854, "learning_rate": 1.365174246688275e-06, "loss": 0.6798, "step": 13790 }, { "epoch": 4.487638256343526, "grad_norm": 1.4210866689682007, "learning_rate": 1.356775077655048e-06, "loss": 0.6892, "step": 13795 }, { "epoch": 4.489264801561483, "grad_norm": 1.7106198072433472, "learning_rate": 1.3484011054242157e-06, "loss": 0.6958, "step": 13800 }, { "epoch": 4.49089134677944, "grad_norm": 1.5935312509536743, "learning_rate": 1.3400523389199976e-06, "loss": 0.6849, "step": 13805 }, { "epoch": 4.492517891997397, "grad_norm": 1.6221320629119873, "learning_rate": 1.3317287870397572e-06, "loss": 0.6824, "step": 13810 }, { "epoch": 4.494144437215355, "grad_norm": 1.4809068441390991, "learning_rate": 1.3234304586539737e-06, "loss": 0.6966, "step": 13815 }, { "epoch": 4.495770982433312, "grad_norm": 1.5949801206588745, "learning_rate": 1.3151573626062535e-06, "loss": 0.6614, "step": 13820 }, { "epoch": 4.497397527651269, "grad_norm": 1.6348658800125122, "learning_rate": 1.3069095077133108e-06, "loss": 0.664, "step": 13825 }, { "epoch": 4.499024072869226, "grad_norm": 1.6470710039138794, "learning_rate": 1.298686902764959e-06, "loss": 0.6765, "step": 13830 }, { "epoch": 4.500650618087183, "grad_norm": 2.0382790565490723, "learning_rate": 1.2904895565241076e-06, "loss": 0.7035, "step": 13835 }, { "epoch": 4.5022771633051395, "grad_norm": 1.7357313632965088, "learning_rate": 1.2823174777267439e-06, "loss": 0.6827, "step": 13840 }, { "epoch": 4.503903708523097, "grad_norm": 1.7862924337387085, "learning_rate": 1.2741706750819232e-06, "loss": 0.6768, "step": 13845 }, { "epoch": 4.505530253741054, "grad_norm": 1.5956922769546509, "learning_rate": 1.266049157271773e-06, "loss": 0.6702, "step": 13850 }, { "epoch": 4.507156798959011, "grad_norm": 1.5607898235321045, "learning_rate": 1.2579529329514645e-06, "loss": 0.6872, "step": 13855 }, { "epoch": 4.508783344176968, "grad_norm": 1.532025694847107, "learning_rate": 1.2498820107492204e-06, "loss": 0.7087, "step": 13860 }, { "epoch": 4.510409889394925, "grad_norm": 1.5448532104492188, "learning_rate": 1.2418363992662997e-06, "loss": 0.6605, "step": 13865 }, { "epoch": 4.512036434612883, "grad_norm": 1.9670016765594482, "learning_rate": 1.233816107076985e-06, "loss": 0.6788, "step": 13870 }, { "epoch": 4.51366297983084, "grad_norm": 1.5377695560455322, "learning_rate": 1.2258211427285671e-06, "loss": 0.6721, "step": 13875 }, { "epoch": 4.5152895250487965, "grad_norm": 1.7832047939300537, "learning_rate": 1.2178515147413665e-06, "loss": 0.6884, "step": 13880 }, { "epoch": 4.5169160702667535, "grad_norm": 1.713898777961731, "learning_rate": 1.2099072316086757e-06, "loss": 0.7073, "step": 13885 }, { "epoch": 4.51854261548471, "grad_norm": 1.7618379592895508, "learning_rate": 1.2019883017967943e-06, "loss": 0.681, "step": 13890 }, { "epoch": 4.520169160702667, "grad_norm": 1.6534425020217896, "learning_rate": 1.194094733745002e-06, "loss": 0.6562, "step": 13895 }, { "epoch": 4.521795705920624, "grad_norm": 1.6413297653198242, "learning_rate": 1.1862265358655505e-06, "loss": 0.6674, "step": 13900 }, { "epoch": 4.523422251138582, "grad_norm": 1.6451581716537476, "learning_rate": 1.1783837165436406e-06, "loss": 0.6725, "step": 13905 }, { "epoch": 4.525048796356539, "grad_norm": 1.6512683629989624, "learning_rate": 1.170566284137442e-06, "loss": 0.725, "step": 13910 }, { "epoch": 4.526675341574496, "grad_norm": 1.8413314819335938, "learning_rate": 1.1627742469780684e-06, "loss": 0.7183, "step": 13915 }, { "epoch": 4.528301886792453, "grad_norm": 1.6987863779067993, "learning_rate": 1.1550076133695604e-06, "loss": 0.6599, "step": 13920 }, { "epoch": 4.52992843201041, "grad_norm": 1.595381736755371, "learning_rate": 1.1472663915888888e-06, "loss": 0.6483, "step": 13925 }, { "epoch": 4.531554977228367, "grad_norm": 1.6309919357299805, "learning_rate": 1.1395505898859487e-06, "loss": 0.7143, "step": 13930 }, { "epoch": 4.533181522446324, "grad_norm": 1.6684917211532593, "learning_rate": 1.1318602164835434e-06, "loss": 0.6908, "step": 13935 }, { "epoch": 4.534808067664281, "grad_norm": 1.4799386262893677, "learning_rate": 1.1241952795773697e-06, "loss": 0.6986, "step": 13940 }, { "epoch": 4.536434612882238, "grad_norm": 1.6156679391860962, "learning_rate": 1.1165557873360267e-06, "loss": 0.6628, "step": 13945 }, { "epoch": 4.538061158100195, "grad_norm": 1.5108729600906372, "learning_rate": 1.108941747900985e-06, "loss": 0.6995, "step": 13950 }, { "epoch": 4.539687703318152, "grad_norm": 1.8006521463394165, "learning_rate": 1.1013531693865985e-06, "loss": 0.6699, "step": 13955 }, { "epoch": 4.541314248536109, "grad_norm": 1.812540054321289, "learning_rate": 1.0937900598800872e-06, "loss": 0.6887, "step": 13960 }, { "epoch": 4.542940793754067, "grad_norm": 1.5427296161651611, "learning_rate": 1.0862524274415282e-06, "loss": 0.6873, "step": 13965 }, { "epoch": 4.544567338972024, "grad_norm": 1.559644341468811, "learning_rate": 1.0787402801038405e-06, "loss": 0.6975, "step": 13970 }, { "epoch": 4.5461938841899805, "grad_norm": 1.694392442703247, "learning_rate": 1.071253625872795e-06, "loss": 0.6426, "step": 13975 }, { "epoch": 4.547820429407937, "grad_norm": 1.7675282955169678, "learning_rate": 1.0637924727269822e-06, "loss": 0.692, "step": 13980 }, { "epoch": 4.549446974625894, "grad_norm": 1.6050221920013428, "learning_rate": 1.0563568286178216e-06, "loss": 0.6857, "step": 13985 }, { "epoch": 4.551073519843852, "grad_norm": 1.9955741167068481, "learning_rate": 1.0489467014695526e-06, "loss": 0.7093, "step": 13990 }, { "epoch": 4.552700065061809, "grad_norm": 1.5415308475494385, "learning_rate": 1.0415620991792135e-06, "loss": 0.6754, "step": 13995 }, { "epoch": 4.554326610279766, "grad_norm": 1.7763447761535645, "learning_rate": 1.0342030296166428e-06, "loss": 0.6847, "step": 14000 }, { "epoch": 4.555953155497723, "grad_norm": 1.557338833808899, "learning_rate": 1.0268695006244695e-06, "loss": 0.7054, "step": 14005 }, { "epoch": 4.55757970071568, "grad_norm": 1.503375768661499, "learning_rate": 1.0195615200180974e-06, "loss": 0.6843, "step": 14010 }, { "epoch": 4.559206245933637, "grad_norm": 1.809707522392273, "learning_rate": 1.0122790955857192e-06, "loss": 0.6646, "step": 14015 }, { "epoch": 4.560832791151594, "grad_norm": 1.6707818508148193, "learning_rate": 1.0050222350882682e-06, "loss": 0.7034, "step": 14020 }, { "epoch": 4.562459336369551, "grad_norm": 1.640432357788086, "learning_rate": 9.97790946259461e-07, "loss": 0.6713, "step": 14025 }, { "epoch": 4.564085881587508, "grad_norm": 2.24538254737854, "learning_rate": 9.905852368057383e-07, "loss": 0.7063, "step": 14030 }, { "epoch": 4.565712426805465, "grad_norm": 1.7272684574127197, "learning_rate": 9.834051144062994e-07, "loss": 0.6818, "step": 14035 }, { "epoch": 4.567338972023422, "grad_norm": 1.669066071510315, "learning_rate": 9.762505867130594e-07, "loss": 0.7088, "step": 14040 }, { "epoch": 4.568965517241379, "grad_norm": 1.7270797491073608, "learning_rate": 9.691216613506692e-07, "loss": 0.6475, "step": 14045 }, { "epoch": 4.570592062459337, "grad_norm": 1.5899087190628052, "learning_rate": 9.620183459164878e-07, "loss": 0.6861, "step": 14050 }, { "epoch": 4.572218607677294, "grad_norm": 1.701887845993042, "learning_rate": 9.549406479805818e-07, "loss": 0.65, "step": 14055 }, { "epoch": 4.573845152895251, "grad_norm": 1.7089637517929077, "learning_rate": 9.478885750857285e-07, "loss": 0.6792, "step": 14060 }, { "epoch": 4.5754716981132075, "grad_norm": 1.6392866373062134, "learning_rate": 9.408621347473751e-07, "loss": 0.7005, "step": 14065 }, { "epoch": 4.577098243331164, "grad_norm": 1.468783974647522, "learning_rate": 9.338613344536701e-07, "loss": 0.6792, "step": 14070 }, { "epoch": 4.578724788549121, "grad_norm": 1.6157522201538086, "learning_rate": 9.26886181665429e-07, "loss": 0.6893, "step": 14075 }, { "epoch": 4.580351333767078, "grad_norm": 1.8369728326797485, "learning_rate": 9.199366838161389e-07, "loss": 0.6937, "step": 14080 }, { "epoch": 4.581977878985036, "grad_norm": 1.8909553289413452, "learning_rate": 9.130128483119366e-07, "loss": 0.7242, "step": 14085 }, { "epoch": 4.583604424202993, "grad_norm": 1.6391102075576782, "learning_rate": 9.06114682531628e-07, "loss": 0.6637, "step": 14090 }, { "epoch": 4.58523096942095, "grad_norm": 1.522191047668457, "learning_rate": 8.992421938266438e-07, "loss": 0.6695, "step": 14095 }, { "epoch": 4.586857514638907, "grad_norm": 1.578761339187622, "learning_rate": 8.923953895210612e-07, "loss": 0.708, "step": 14100 }, { "epoch": 4.588484059856864, "grad_norm": 1.6012659072875977, "learning_rate": 8.855742769115799e-07, "loss": 0.6891, "step": 14105 }, { "epoch": 4.5901106050748215, "grad_norm": 1.5730737447738647, "learning_rate": 8.787788632675293e-07, "loss": 0.6859, "step": 14110 }, { "epoch": 4.591737150292778, "grad_norm": 1.9886281490325928, "learning_rate": 8.720091558308357e-07, "loss": 0.6419, "step": 14115 }, { "epoch": 4.593363695510735, "grad_norm": 1.7283319234848022, "learning_rate": 8.652651618160424e-07, "loss": 0.6484, "step": 14120 }, { "epoch": 4.594990240728692, "grad_norm": 1.4707318544387817, "learning_rate": 8.58546888410286e-07, "loss": 0.6707, "step": 14125 }, { "epoch": 4.596616785946649, "grad_norm": 1.6166142225265503, "learning_rate": 8.51854342773295e-07, "loss": 0.6663, "step": 14130 }, { "epoch": 4.598243331164606, "grad_norm": 1.5770900249481201, "learning_rate": 8.451875320373698e-07, "loss": 0.6699, "step": 14135 }, { "epoch": 4.599869876382563, "grad_norm": 1.5864421129226685, "learning_rate": 8.385464633074019e-07, "loss": 0.7018, "step": 14140 }, { "epoch": 4.601496421600521, "grad_norm": 1.6702364683151245, "learning_rate": 8.319311436608301e-07, "loss": 0.6987, "step": 14145 }, { "epoch": 4.603122966818478, "grad_norm": 1.612497091293335, "learning_rate": 8.25341580147665e-07, "loss": 0.6777, "step": 14150 }, { "epoch": 4.604749512036435, "grad_norm": 1.5536036491394043, "learning_rate": 8.187777797904639e-07, "loss": 0.6782, "step": 14155 }, { "epoch": 4.6063760572543915, "grad_norm": 1.7219789028167725, "learning_rate": 8.122397495843343e-07, "loss": 0.6802, "step": 14160 }, { "epoch": 4.608002602472348, "grad_norm": 1.5853407382965088, "learning_rate": 8.057274964969108e-07, "loss": 0.6883, "step": 14165 }, { "epoch": 4.609629147690306, "grad_norm": 1.5012727975845337, "learning_rate": 7.992410274683615e-07, "loss": 0.6673, "step": 14170 }, { "epoch": 4.611255692908263, "grad_norm": 1.6917630434036255, "learning_rate": 7.927803494113761e-07, "loss": 0.6705, "step": 14175 }, { "epoch": 4.61288223812622, "grad_norm": 1.7774066925048828, "learning_rate": 7.863454692111583e-07, "loss": 0.6906, "step": 14180 }, { "epoch": 4.614508783344177, "grad_norm": 1.6379450559616089, "learning_rate": 7.799363937254195e-07, "loss": 0.6727, "step": 14185 }, { "epoch": 4.616135328562134, "grad_norm": 2.1022017002105713, "learning_rate": 7.735531297843713e-07, "loss": 0.6819, "step": 14190 }, { "epoch": 4.617761873780091, "grad_norm": 1.9289857149124146, "learning_rate": 7.671956841907218e-07, "loss": 0.6955, "step": 14195 }, { "epoch": 4.6193884189980485, "grad_norm": 1.7522467374801636, "learning_rate": 7.60864063719649e-07, "loss": 0.6867, "step": 14200 }, { "epoch": 4.621014964216005, "grad_norm": 1.8458904027938843, "learning_rate": 7.545582751188274e-07, "loss": 0.6703, "step": 14205 }, { "epoch": 4.622641509433962, "grad_norm": 1.6652015447616577, "learning_rate": 7.482783251083869e-07, "loss": 0.6705, "step": 14210 }, { "epoch": 4.624268054651919, "grad_norm": 1.6297311782836914, "learning_rate": 7.420242203809325e-07, "loss": 0.6747, "step": 14215 }, { "epoch": 4.625894599869876, "grad_norm": 1.6660754680633545, "learning_rate": 7.357959676015214e-07, "loss": 0.672, "step": 14220 }, { "epoch": 4.627521145087833, "grad_norm": 1.6898998022079468, "learning_rate": 7.295935734076609e-07, "loss": 0.6934, "step": 14225 }, { "epoch": 4.629147690305791, "grad_norm": 1.513210415840149, "learning_rate": 7.234170444092942e-07, "loss": 0.6635, "step": 14230 }, { "epoch": 4.630774235523748, "grad_norm": 2.018749237060547, "learning_rate": 7.172663871888113e-07, "loss": 0.692, "step": 14235 }, { "epoch": 4.632400780741705, "grad_norm": 1.886714220046997, "learning_rate": 7.123644934982576e-07, "loss": 0.6862, "step": 14240 }, { "epoch": 4.634027325959662, "grad_norm": 1.622294545173645, "learning_rate": 7.062604219775531e-07, "loss": 0.6715, "step": 14245 }, { "epoch": 4.6356538711776185, "grad_norm": 1.710085391998291, "learning_rate": 7.001822405187014e-07, "loss": 0.7028, "step": 14250 }, { "epoch": 4.637280416395575, "grad_norm": 1.6468864679336548, "learning_rate": 6.941299555992737e-07, "loss": 0.6622, "step": 14255 }, { "epoch": 4.638906961613533, "grad_norm": 1.7270046472549438, "learning_rate": 6.881035736692493e-07, "loss": 0.7071, "step": 14260 }, { "epoch": 4.64053350683149, "grad_norm": 1.6101285219192505, "learning_rate": 6.821031011509937e-07, "loss": 0.6706, "step": 14265 }, { "epoch": 4.642160052049447, "grad_norm": 1.7665863037109375, "learning_rate": 6.761285444392696e-07, "loss": 0.6607, "step": 14270 }, { "epoch": 4.643786597267404, "grad_norm": 1.5395805835723877, "learning_rate": 6.701799099012141e-07, "loss": 0.6674, "step": 14275 }, { "epoch": 4.645413142485361, "grad_norm": 1.651487112045288, "learning_rate": 6.642572038763506e-07, "loss": 0.6716, "step": 14280 }, { "epoch": 4.647039687703318, "grad_norm": 1.715843677520752, "learning_rate": 6.583604326765496e-07, "loss": 0.7325, "step": 14285 }, { "epoch": 4.648666232921276, "grad_norm": 1.602758526802063, "learning_rate": 6.524896025860594e-07, "loss": 0.6697, "step": 14290 }, { "epoch": 4.6502927781392325, "grad_norm": 1.5656150579452515, "learning_rate": 6.466447198614806e-07, "loss": 0.6939, "step": 14295 }, { "epoch": 4.651919323357189, "grad_norm": 1.6969166994094849, "learning_rate": 6.408257907317527e-07, "loss": 0.666, "step": 14300 }, { "epoch": 4.653545868575146, "grad_norm": 1.6942557096481323, "learning_rate": 6.350328213981654e-07, "loss": 0.6791, "step": 14305 }, { "epoch": 4.655172413793103, "grad_norm": 1.4901227951049805, "learning_rate": 6.292658180343414e-07, "loss": 0.6323, "step": 14310 }, { "epoch": 4.656798959011061, "grad_norm": 1.5597556829452515, "learning_rate": 6.235247867862226e-07, "loss": 0.6557, "step": 14315 }, { "epoch": 4.658425504229018, "grad_norm": 1.626906156539917, "learning_rate": 6.178097337720872e-07, "loss": 0.6675, "step": 14320 }, { "epoch": 4.660052049446975, "grad_norm": 1.4907982349395752, "learning_rate": 6.121206650825162e-07, "loss": 0.6647, "step": 14325 }, { "epoch": 4.661678594664932, "grad_norm": 1.5998609066009521, "learning_rate": 6.064575867803985e-07, "loss": 0.6517, "step": 14330 }, { "epoch": 4.663305139882889, "grad_norm": 1.5094411373138428, "learning_rate": 6.008205049009341e-07, "loss": 0.6939, "step": 14335 }, { "epoch": 4.6649316851008455, "grad_norm": 1.6829676628112793, "learning_rate": 5.952094254516094e-07, "loss": 0.6631, "step": 14340 }, { "epoch": 4.6665582303188025, "grad_norm": 1.664178490638733, "learning_rate": 5.896243544122076e-07, "loss": 0.6736, "step": 14345 }, { "epoch": 4.66818477553676, "grad_norm": 1.8332068920135498, "learning_rate": 5.840652977347872e-07, "loss": 0.7112, "step": 14350 }, { "epoch": 4.669811320754717, "grad_norm": 1.821580410003662, "learning_rate": 5.785322613436894e-07, "loss": 0.6843, "step": 14355 }, { "epoch": 4.671437865972674, "grad_norm": 1.5196067094802856, "learning_rate": 5.730252511355172e-07, "loss": 0.6575, "step": 14360 }, { "epoch": 4.673064411190631, "grad_norm": 1.9047868251800537, "learning_rate": 5.675442729791425e-07, "loss": 0.6867, "step": 14365 }, { "epoch": 4.674690956408588, "grad_norm": 1.5554510354995728, "learning_rate": 5.620893327156957e-07, "loss": 0.7028, "step": 14370 }, { "epoch": 4.676317501626546, "grad_norm": 1.5679271221160889, "learning_rate": 5.566604361585626e-07, "loss": 0.705, "step": 14375 }, { "epoch": 4.677944046844503, "grad_norm": 1.5326664447784424, "learning_rate": 5.512575890933569e-07, "loss": 0.6394, "step": 14380 }, { "epoch": 4.6795705920624595, "grad_norm": 1.836523413658142, "learning_rate": 5.458807972779534e-07, "loss": 0.6852, "step": 14385 }, { "epoch": 4.681197137280416, "grad_norm": 1.7149012088775635, "learning_rate": 5.405300664424379e-07, "loss": 0.6553, "step": 14390 }, { "epoch": 4.682823682498373, "grad_norm": 1.6908270120620728, "learning_rate": 5.352054022891406e-07, "loss": 0.648, "step": 14395 }, { "epoch": 4.68445022771633, "grad_norm": 1.5774027109146118, "learning_rate": 5.299068104926058e-07, "loss": 0.7161, "step": 14400 }, { "epoch": 4.686076772934287, "grad_norm": 1.7259037494659424, "learning_rate": 5.246342966995888e-07, "loss": 0.6771, "step": 14405 }, { "epoch": 4.687703318152245, "grad_norm": 1.6489514112472534, "learning_rate": 5.193878665290558e-07, "loss": 0.6921, "step": 14410 }, { "epoch": 4.689329863370202, "grad_norm": 1.5153777599334717, "learning_rate": 5.141675255721762e-07, "loss": 0.7097, "step": 14415 }, { "epoch": 4.690956408588159, "grad_norm": 1.5902612209320068, "learning_rate": 5.089732793923163e-07, "loss": 0.6648, "step": 14420 }, { "epoch": 4.692582953806116, "grad_norm": 1.688076376914978, "learning_rate": 5.038051335250316e-07, "loss": 0.6954, "step": 14425 }, { "epoch": 4.694209499024073, "grad_norm": 1.4784841537475586, "learning_rate": 4.986630934780606e-07, "loss": 0.6689, "step": 14430 }, { "epoch": 4.69583604424203, "grad_norm": 1.5338027477264404, "learning_rate": 4.935471647313284e-07, "loss": 0.65, "step": 14435 }, { "epoch": 4.697462589459987, "grad_norm": 1.8620774745941162, "learning_rate": 4.884573527369207e-07, "loss": 0.6943, "step": 14440 }, { "epoch": 4.699089134677944, "grad_norm": 1.7924411296844482, "learning_rate": 4.833936629191016e-07, "loss": 0.6772, "step": 14445 }, { "epoch": 4.700715679895901, "grad_norm": 1.6954618692398071, "learning_rate": 4.783561006742876e-07, "loss": 0.6691, "step": 14450 }, { "epoch": 4.702342225113858, "grad_norm": 1.5615060329437256, "learning_rate": 4.7334467137105933e-07, "loss": 0.6821, "step": 14455 }, { "epoch": 4.703968770331815, "grad_norm": 1.760363221168518, "learning_rate": 4.683593803501446e-07, "loss": 0.6869, "step": 14460 }, { "epoch": 4.705595315549772, "grad_norm": 1.7261453866958618, "learning_rate": 4.634002329244047e-07, "loss": 0.6991, "step": 14465 }, { "epoch": 4.70722186076773, "grad_norm": 1.5953327417373657, "learning_rate": 4.584672343788593e-07, "loss": 0.6907, "step": 14470 }, { "epoch": 4.7088484059856865, "grad_norm": 1.564792275428772, "learning_rate": 4.535603899706448e-07, "loss": 0.6547, "step": 14475 }, { "epoch": 4.7104749512036435, "grad_norm": 1.458240270614624, "learning_rate": 4.486797049290337e-07, "loss": 0.694, "step": 14480 }, { "epoch": 4.7121014964216, "grad_norm": 1.7077065706253052, "learning_rate": 4.438251844554098e-07, "loss": 0.6709, "step": 14485 }, { "epoch": 4.713728041639557, "grad_norm": 1.5657155513763428, "learning_rate": 4.389968337232903e-07, "loss": 0.6636, "step": 14490 }, { "epoch": 4.715354586857515, "grad_norm": 1.5335454940795898, "learning_rate": 4.341946578782868e-07, "loss": 0.6653, "step": 14495 }, { "epoch": 4.716981132075472, "grad_norm": 1.4735745191574097, "learning_rate": 4.294186620381224e-07, "loss": 0.6632, "step": 14500 }, { "epoch": 4.718607677293429, "grad_norm": 1.7237752676010132, "learning_rate": 4.2466885129262004e-07, "loss": 0.6758, "step": 14505 }, { "epoch": 4.720234222511386, "grad_norm": 1.634718418121338, "learning_rate": 4.1994523070369994e-07, "loss": 0.6811, "step": 14510 }, { "epoch": 4.721860767729343, "grad_norm": 1.5017166137695312, "learning_rate": 4.152478053053632e-07, "loss": 0.6822, "step": 14515 }, { "epoch": 4.7234873129473, "grad_norm": 1.7236521244049072, "learning_rate": 4.1057658010370536e-07, "loss": 0.6568, "step": 14520 }, { "epoch": 4.7251138581652565, "grad_norm": 1.8503830432891846, "learning_rate": 4.059315600768887e-07, "loss": 0.7083, "step": 14525 }, { "epoch": 4.726740403383214, "grad_norm": 1.4786392450332642, "learning_rate": 4.0131275017515345e-07, "loss": 0.6727, "step": 14530 }, { "epoch": 4.728366948601171, "grad_norm": 1.6189405918121338, "learning_rate": 3.967201553208122e-07, "loss": 0.6667, "step": 14535 }, { "epoch": 4.729993493819128, "grad_norm": 1.5545790195465088, "learning_rate": 3.921537804082359e-07, "loss": 0.6903, "step": 14540 }, { "epoch": 4.731620039037085, "grad_norm": 1.7284637689590454, "learning_rate": 3.876136303038458e-07, "loss": 0.6747, "step": 14545 }, { "epoch": 4.733246584255042, "grad_norm": 1.68375825881958, "learning_rate": 3.83099709846127e-07, "loss": 0.6665, "step": 14550 }, { "epoch": 4.734873129473, "grad_norm": 1.624594807624817, "learning_rate": 3.7861202384560644e-07, "loss": 0.6798, "step": 14555 }, { "epoch": 4.736499674690957, "grad_norm": 1.7719902992248535, "learning_rate": 3.741505770848475e-07, "loss": 0.6865, "step": 14560 }, { "epoch": 4.738126219908914, "grad_norm": 1.7481844425201416, "learning_rate": 3.6971537431846057e-07, "loss": 0.6909, "step": 14565 }, { "epoch": 4.7397527651268705, "grad_norm": 1.769526720046997, "learning_rate": 3.6530642027307883e-07, "loss": 0.7087, "step": 14570 }, { "epoch": 4.741379310344827, "grad_norm": 1.6826857328414917, "learning_rate": 3.609237196473658e-07, "loss": 0.682, "step": 14575 }, { "epoch": 4.743005855562784, "grad_norm": 1.7040724754333496, "learning_rate": 3.565672771120104e-07, "loss": 0.6752, "step": 14580 }, { "epoch": 4.744632400780741, "grad_norm": 1.7584631443023682, "learning_rate": 3.5223709730970446e-07, "loss": 0.6565, "step": 14585 }, { "epoch": 4.746258945998699, "grad_norm": 1.738541603088379, "learning_rate": 3.479331848551648e-07, "loss": 0.6967, "step": 14590 }, { "epoch": 4.747885491216656, "grad_norm": 1.549530267715454, "learning_rate": 3.4365554433511416e-07, "loss": 0.6558, "step": 14595 }, { "epoch": 4.749512036434613, "grad_norm": 1.538201928138733, "learning_rate": 3.3940418030826706e-07, "loss": 0.6723, "step": 14600 }, { "epoch": 4.75113858165257, "grad_norm": 1.5931099653244019, "learning_rate": 3.3517909730534926e-07, "loss": 0.675, "step": 14605 }, { "epoch": 4.752765126870527, "grad_norm": 1.7805777788162231, "learning_rate": 3.3098029982906176e-07, "loss": 0.6716, "step": 14610 }, { "epoch": 4.7543916720884845, "grad_norm": 1.9395880699157715, "learning_rate": 3.268077923541085e-07, "loss": 0.6792, "step": 14615 }, { "epoch": 4.756018217306441, "grad_norm": 1.4551812410354614, "learning_rate": 3.226615793271631e-07, "loss": 0.6597, "step": 14620 }, { "epoch": 4.757644762524398, "grad_norm": 1.621719479560852, "learning_rate": 3.185416651668882e-07, "loss": 0.6615, "step": 14625 }, { "epoch": 4.759271307742355, "grad_norm": 1.7234394550323486, "learning_rate": 3.1444805426391333e-07, "loss": 0.6727, "step": 14630 }, { "epoch": 4.760897852960312, "grad_norm": 1.6716097593307495, "learning_rate": 3.1038075098083485e-07, "loss": 0.6916, "step": 14635 }, { "epoch": 4.762524398178269, "grad_norm": 1.663861870765686, "learning_rate": 3.0633975965221606e-07, "loss": 0.708, "step": 14640 }, { "epoch": 4.764150943396227, "grad_norm": 1.649070382118225, "learning_rate": 3.023250845845815e-07, "loss": 0.6442, "step": 14645 }, { "epoch": 4.765777488614184, "grad_norm": 1.600273847579956, "learning_rate": 2.983367300564033e-07, "loss": 0.6687, "step": 14650 }, { "epoch": 4.767404033832141, "grad_norm": 1.795655608177185, "learning_rate": 2.943747003181091e-07, "loss": 0.6574, "step": 14655 }, { "epoch": 4.7690305790500975, "grad_norm": 1.6123244762420654, "learning_rate": 2.9043899959206864e-07, "loss": 0.6795, "step": 14660 }, { "epoch": 4.770657124268054, "grad_norm": 1.5920459032058716, "learning_rate": 2.8652963207260184e-07, "loss": 0.7007, "step": 14665 }, { "epoch": 4.772283669486011, "grad_norm": 1.7603579759597778, "learning_rate": 2.8264660192594827e-07, "loss": 0.6864, "step": 14670 }, { "epoch": 4.773910214703969, "grad_norm": 1.4247936010360718, "learning_rate": 2.787899132902949e-07, "loss": 0.6561, "step": 14675 }, { "epoch": 4.775536759921926, "grad_norm": 1.5400625467300415, "learning_rate": 2.749595702757485e-07, "loss": 0.6797, "step": 14680 }, { "epoch": 4.777163305139883, "grad_norm": 1.572620153427124, "learning_rate": 2.711555769643381e-07, "loss": 0.6576, "step": 14685 }, { "epoch": 4.77878985035784, "grad_norm": 1.7807002067565918, "learning_rate": 2.6737793741001825e-07, "loss": 0.6285, "step": 14690 }, { "epoch": 4.780416395575797, "grad_norm": 1.6995484828948975, "learning_rate": 2.636266556386546e-07, "loss": 0.6568, "step": 14695 }, { "epoch": 4.782042940793754, "grad_norm": 1.7155641317367554, "learning_rate": 2.599017356480188e-07, "loss": 0.6904, "step": 14700 }, { "epoch": 4.7836694860117115, "grad_norm": 1.6149102449417114, "learning_rate": 2.562031814077964e-07, "loss": 0.6861, "step": 14705 }, { "epoch": 4.785296031229668, "grad_norm": 1.6770730018615723, "learning_rate": 2.525309968595652e-07, "loss": 0.6784, "step": 14710 }, { "epoch": 4.786922576447625, "grad_norm": 1.7513384819030762, "learning_rate": 2.488851859168112e-07, "loss": 0.6554, "step": 14715 }, { "epoch": 4.788549121665582, "grad_norm": 1.8393046855926514, "learning_rate": 2.4526575246490713e-07, "loss": 0.6796, "step": 14720 }, { "epoch": 4.790175666883539, "grad_norm": 1.7560538053512573, "learning_rate": 2.4167270036111743e-07, "loss": 0.7077, "step": 14725 }, { "epoch": 4.791802212101496, "grad_norm": 1.6581623554229736, "learning_rate": 2.3810603343459025e-07, "loss": 0.6796, "step": 14730 }, { "epoch": 4.793428757319454, "grad_norm": 1.5466569662094116, "learning_rate": 2.345657554863545e-07, "loss": 0.6485, "step": 14735 }, { "epoch": 4.795055302537411, "grad_norm": 1.6848571300506592, "learning_rate": 2.3105187028931996e-07, "loss": 0.6682, "step": 14740 }, { "epoch": 4.796681847755368, "grad_norm": 1.7092764377593994, "learning_rate": 2.2756438158826053e-07, "loss": 0.667, "step": 14745 }, { "epoch": 4.798308392973325, "grad_norm": 1.7653117179870605, "learning_rate": 2.2410329309982824e-07, "loss": 0.687, "step": 14750 }, { "epoch": 4.7999349381912815, "grad_norm": 1.8479626178741455, "learning_rate": 2.2066860851253922e-07, "loss": 0.6775, "step": 14755 }, { "epoch": 4.801561483409239, "grad_norm": 1.5833468437194824, "learning_rate": 2.172603314867655e-07, "loss": 0.7052, "step": 14760 }, { "epoch": 4.803188028627196, "grad_norm": 1.5006370544433594, "learning_rate": 2.1387846565474045e-07, "loss": 0.6751, "step": 14765 }, { "epoch": 4.804814573845153, "grad_norm": 1.6979249715805054, "learning_rate": 2.105230146205478e-07, "loss": 0.6868, "step": 14770 }, { "epoch": 4.80644111906311, "grad_norm": 1.4989633560180664, "learning_rate": 2.0719398196012707e-07, "loss": 0.6773, "step": 14775 }, { "epoch": 4.808067664281067, "grad_norm": 1.617918610572815, "learning_rate": 2.0389137122125422e-07, "loss": 0.6774, "step": 14780 }, { "epoch": 4.809694209499024, "grad_norm": 1.9048950672149658, "learning_rate": 2.0061518592355277e-07, "loss": 0.6875, "step": 14785 }, { "epoch": 4.811320754716981, "grad_norm": 1.6286317110061646, "learning_rate": 1.9736542955848537e-07, "loss": 0.7065, "step": 14790 }, { "epoch": 4.8129472999349385, "grad_norm": 1.723865270614624, "learning_rate": 1.9414210558934554e-07, "loss": 0.6878, "step": 14795 }, { "epoch": 4.814573845152895, "grad_norm": 1.453617811203003, "learning_rate": 1.9094521745126325e-07, "loss": 0.6423, "step": 14800 }, { "epoch": 4.816200390370852, "grad_norm": 1.6138180494308472, "learning_rate": 1.8777476855118547e-07, "loss": 0.6759, "step": 14805 }, { "epoch": 4.817826935588809, "grad_norm": 1.491004228591919, "learning_rate": 1.8463076226789277e-07, "loss": 0.6685, "step": 14810 }, { "epoch": 4.819453480806766, "grad_norm": 1.6978496313095093, "learning_rate": 1.8151320195197997e-07, "loss": 0.7444, "step": 14815 }, { "epoch": 4.821080026024724, "grad_norm": 1.4545952081680298, "learning_rate": 1.784220909258616e-07, "loss": 0.6548, "step": 14820 }, { "epoch": 4.822706571242681, "grad_norm": 1.7182471752166748, "learning_rate": 1.753574324837609e-07, "loss": 0.7052, "step": 14825 }, { "epoch": 4.824333116460638, "grad_norm": 1.5585763454437256, "learning_rate": 1.7231922989171534e-07, "loss": 0.6869, "step": 14830 }, { "epoch": 4.825959661678595, "grad_norm": 1.795316457748413, "learning_rate": 1.6930748638756266e-07, "loss": 0.6806, "step": 14835 }, { "epoch": 4.827586206896552, "grad_norm": 1.6641993522644043, "learning_rate": 1.6632220518094932e-07, "loss": 0.671, "step": 14840 }, { "epoch": 4.8292127521145085, "grad_norm": 1.7143789529800415, "learning_rate": 1.6336338945331098e-07, "loss": 0.6689, "step": 14845 }, { "epoch": 4.830839297332465, "grad_norm": 1.4872487783432007, "learning_rate": 1.604310423578892e-07, "loss": 0.6655, "step": 14850 }, { "epoch": 4.832465842550423, "grad_norm": 1.601290225982666, "learning_rate": 1.57525167019712e-07, "loss": 0.669, "step": 14855 }, { "epoch": 4.83409238776838, "grad_norm": 1.7025182247161865, "learning_rate": 1.5464576653559658e-07, "loss": 0.6901, "step": 14860 }, { "epoch": 4.835718932986337, "grad_norm": 1.5925567150115967, "learning_rate": 1.517928439741495e-07, "loss": 0.686, "step": 14865 }, { "epoch": 4.837345478204294, "grad_norm": 1.712281584739685, "learning_rate": 1.4896640237575254e-07, "loss": 0.6701, "step": 14870 }, { "epoch": 4.838972023422251, "grad_norm": 1.7019681930541992, "learning_rate": 1.461664447525768e-07, "loss": 0.7088, "step": 14875 }, { "epoch": 4.840598568640209, "grad_norm": 1.6846308708190918, "learning_rate": 1.4339297408855478e-07, "loss": 0.6922, "step": 14880 }, { "epoch": 4.842225113858166, "grad_norm": 1.5838193893432617, "learning_rate": 1.4064599333940555e-07, "loss": 0.653, "step": 14885 }, { "epoch": 4.8438516590761225, "grad_norm": 1.7012208700180054, "learning_rate": 1.3792550543260952e-07, "loss": 0.6891, "step": 14890 }, { "epoch": 4.845478204294079, "grad_norm": 1.6707468032836914, "learning_rate": 1.3523151326741702e-07, "loss": 0.6402, "step": 14895 }, { "epoch": 4.847104749512036, "grad_norm": 1.5512348413467407, "learning_rate": 1.32564019714837e-07, "loss": 0.65, "step": 14900 }, { "epoch": 4.848731294729993, "grad_norm": 1.6866685152053833, "learning_rate": 1.299230276176483e-07, "loss": 0.6741, "step": 14905 }, { "epoch": 4.85035783994795, "grad_norm": 1.4258533716201782, "learning_rate": 1.2730853979037172e-07, "loss": 0.6627, "step": 14910 }, { "epoch": 4.851984385165908, "grad_norm": 1.6666285991668701, "learning_rate": 1.247205590192979e-07, "loss": 0.6701, "step": 14915 }, { "epoch": 4.853610930383865, "grad_norm": 1.6783100366592407, "learning_rate": 1.2215908806245958e-07, "loss": 0.6776, "step": 14920 }, { "epoch": 4.855237475601822, "grad_norm": 1.6972566843032837, "learning_rate": 1.1962412964964254e-07, "loss": 0.6666, "step": 14925 }, { "epoch": 4.856864020819779, "grad_norm": 1.6129467487335205, "learning_rate": 1.1711568648236914e-07, "loss": 0.6661, "step": 14930 }, { "epoch": 4.8584905660377355, "grad_norm": 1.4761837720870972, "learning_rate": 1.1463376123391766e-07, "loss": 0.685, "step": 14935 }, { "epoch": 4.860117111255693, "grad_norm": 1.5007270574569702, "learning_rate": 1.1217835654929177e-07, "loss": 0.6813, "step": 14940 }, { "epoch": 4.86174365647365, "grad_norm": 1.795398235321045, "learning_rate": 1.0974947504524269e-07, "loss": 0.6135, "step": 14945 }, { "epoch": 4.863370201691607, "grad_norm": 1.6310721635818481, "learning_rate": 1.0734711931024987e-07, "loss": 0.675, "step": 14950 }, { "epoch": 4.864996746909564, "grad_norm": 1.7355406284332275, "learning_rate": 1.0497129190452926e-07, "loss": 0.6681, "step": 14955 }, { "epoch": 4.866623292127521, "grad_norm": 1.702034592628479, "learning_rate": 1.0262199536001938e-07, "loss": 0.6771, "step": 14960 }, { "epoch": 4.868249837345478, "grad_norm": 1.6706565618515015, "learning_rate": 1.0029923218038972e-07, "loss": 0.6826, "step": 14965 }, { "epoch": 4.869876382563435, "grad_norm": 1.5301884412765503, "learning_rate": 9.800300484102409e-08, "loss": 0.69, "step": 14970 }, { "epoch": 4.871502927781393, "grad_norm": 1.4553338289260864, "learning_rate": 9.573331578904e-08, "loss": 0.6957, "step": 14975 }, { "epoch": 4.8731294729993495, "grad_norm": 1.6955540180206299, "learning_rate": 9.349016744326367e-08, "loss": 0.6894, "step": 14980 }, { "epoch": 4.874756018217306, "grad_norm": 1.6802115440368652, "learning_rate": 9.127356219423843e-08, "loss": 0.6694, "step": 14985 }, { "epoch": 4.876382563435263, "grad_norm": 1.7426230907440186, "learning_rate": 8.908350240421914e-08, "loss": 0.698, "step": 14990 }, { "epoch": 4.87800910865322, "grad_norm": 1.5391120910644531, "learning_rate": 8.691999040717491e-08, "loss": 0.6799, "step": 14995 }, { "epoch": 4.879635653871178, "grad_norm": 1.4747508764266968, "learning_rate": 8.478302850878361e-08, "loss": 0.6663, "step": 15000 }, { "epoch": 4.881262199089135, "grad_norm": 1.7132428884506226, "learning_rate": 8.267261898641798e-08, "loss": 0.6593, "step": 15005 }, { "epoch": 4.882888744307092, "grad_norm": 1.4830818176269531, "learning_rate": 8.058876408916505e-08, "loss": 0.6809, "step": 15010 }, { "epoch": 4.884515289525049, "grad_norm": 1.535070538520813, "learning_rate": 7.853146603780947e-08, "loss": 0.6889, "step": 15015 }, { "epoch": 4.886141834743006, "grad_norm": 1.990239143371582, "learning_rate": 7.65007270248308e-08, "loss": 0.6759, "step": 15020 }, { "epoch": 4.887768379960963, "grad_norm": 1.5111820697784424, "learning_rate": 7.449654921440618e-08, "loss": 0.6701, "step": 15025 }, { "epoch": 4.8893949251789195, "grad_norm": 2.4163124561309814, "learning_rate": 7.25189347424049e-08, "loss": 0.6837, "step": 15030 }, { "epoch": 4.891021470396877, "grad_norm": 1.5814790725708008, "learning_rate": 7.056788571639105e-08, "loss": 0.6628, "step": 15035 }, { "epoch": 4.892648015614834, "grad_norm": 1.7669132947921753, "learning_rate": 6.86434042156181e-08, "loss": 0.6635, "step": 15040 }, { "epoch": 4.894274560832791, "grad_norm": 1.4718823432922363, "learning_rate": 6.674549229101767e-08, "loss": 0.6685, "step": 15045 }, { "epoch": 4.895901106050748, "grad_norm": 1.641991376876831, "learning_rate": 6.487415196521629e-08, "loss": 0.6623, "step": 15050 }, { "epoch": 4.897527651268705, "grad_norm": 1.6729685068130493, "learning_rate": 6.302938523251589e-08, "loss": 0.6798, "step": 15055 }, { "epoch": 4.899154196486663, "grad_norm": 1.5874401330947876, "learning_rate": 6.121119405890497e-08, "loss": 0.6982, "step": 15060 }, { "epoch": 4.90078074170462, "grad_norm": 1.5314335823059082, "learning_rate": 5.941958038204187e-08, "loss": 0.6549, "step": 15065 }, { "epoch": 4.9024072869225765, "grad_norm": 1.745509386062622, "learning_rate": 5.7654546111268746e-08, "loss": 0.6861, "step": 15070 }, { "epoch": 4.9040338321405335, "grad_norm": 1.6883145570755005, "learning_rate": 5.59160931275976e-08, "loss": 0.7008, "step": 15075 }, { "epoch": 4.90566037735849, "grad_norm": 1.8249529600143433, "learning_rate": 5.4204223283715884e-08, "loss": 0.6991, "step": 15080 }, { "epoch": 4.907286922576447, "grad_norm": 1.7526054382324219, "learning_rate": 5.2518938403978145e-08, "loss": 0.6424, "step": 15085 }, { "epoch": 4.908913467794405, "grad_norm": 1.655096411705017, "learning_rate": 5.086024028440883e-08, "loss": 0.6912, "step": 15090 }, { "epoch": 4.910540013012362, "grad_norm": 1.5978666543960571, "learning_rate": 4.922813069269394e-08, "loss": 0.6885, "step": 15095 }, { "epoch": 4.912166558230319, "grad_norm": 1.6107183694839478, "learning_rate": 4.7622611368189354e-08, "loss": 0.6689, "step": 15100 }, { "epoch": 4.913793103448276, "grad_norm": 1.8669627904891968, "learning_rate": 4.604368402191528e-08, "loss": 0.6947, "step": 15105 }, { "epoch": 4.915419648666233, "grad_norm": 1.5616167783737183, "learning_rate": 4.4491350336545166e-08, "loss": 0.6621, "step": 15110 }, { "epoch": 4.91704619388419, "grad_norm": 1.680681586265564, "learning_rate": 4.2965611966416796e-08, "loss": 0.6813, "step": 15115 }, { "epoch": 4.918672739102147, "grad_norm": 1.645995855331421, "learning_rate": 4.146647053752395e-08, "loss": 0.6562, "step": 15120 }, { "epoch": 4.920299284320104, "grad_norm": 1.5914673805236816, "learning_rate": 3.9993927647516415e-08, "loss": 0.6905, "step": 15125 }, { "epoch": 4.921925829538061, "grad_norm": 1.7676544189453125, "learning_rate": 3.8547984865691664e-08, "loss": 0.6923, "step": 15130 }, { "epoch": 4.923552374756018, "grad_norm": 1.7147361040115356, "learning_rate": 3.71286437330115e-08, "loss": 0.6894, "step": 15135 }, { "epoch": 4.925178919973975, "grad_norm": 1.447590947151184, "learning_rate": 3.573590576207431e-08, "loss": 0.6609, "step": 15140 }, { "epoch": 4.926805465191932, "grad_norm": 1.9367969036102295, "learning_rate": 3.4369772437137236e-08, "loss": 0.6886, "step": 15145 }, { "epoch": 4.92843201040989, "grad_norm": 1.6576991081237793, "learning_rate": 3.303024521410236e-08, "loss": 0.6972, "step": 15150 }, { "epoch": 4.930058555627847, "grad_norm": 1.6981548070907593, "learning_rate": 3.1717325520513876e-08, "loss": 0.6839, "step": 15155 }, { "epoch": 4.931685100845804, "grad_norm": 1.7181931734085083, "learning_rate": 3.0431014755560874e-08, "loss": 0.6982, "step": 15160 }, { "epoch": 4.9333116460637605, "grad_norm": 1.5971299409866333, "learning_rate": 2.9171314290080132e-08, "loss": 0.6646, "step": 15165 }, { "epoch": 4.934938191281717, "grad_norm": 1.5667660236358643, "learning_rate": 2.7938225466542234e-08, "loss": 0.6716, "step": 15170 }, { "epoch": 4.936564736499674, "grad_norm": 1.5804226398468018, "learning_rate": 2.6731749599065435e-08, "loss": 0.6919, "step": 15175 }, { "epoch": 4.938191281717632, "grad_norm": 1.7205936908721924, "learning_rate": 2.55518879734018e-08, "loss": 0.6852, "step": 15180 }, { "epoch": 4.939817826935589, "grad_norm": 1.7147464752197266, "learning_rate": 2.4398641846937187e-08, "loss": 0.6944, "step": 15185 }, { "epoch": 4.941444372153546, "grad_norm": 1.5842211246490479, "learning_rate": 2.3272012448696813e-08, "loss": 0.6901, "step": 15190 }, { "epoch": 4.943070917371503, "grad_norm": 1.6862308979034424, "learning_rate": 2.2172000979345242e-08, "loss": 0.6886, "step": 15195 }, { "epoch": 4.94469746258946, "grad_norm": 1.6940590143203735, "learning_rate": 2.1098608611169744e-08, "loss": 0.6746, "step": 15200 }, { "epoch": 4.9463240078074175, "grad_norm": 1.7121295928955078, "learning_rate": 2.0051836488094167e-08, "loss": 0.6913, "step": 15205 }, { "epoch": 4.9479505530253745, "grad_norm": 1.5824449062347412, "learning_rate": 1.9031685725678927e-08, "loss": 0.6846, "step": 15210 }, { "epoch": 4.949577098243331, "grad_norm": 1.7550363540649414, "learning_rate": 1.8038157411101597e-08, "loss": 0.6732, "step": 15215 }, { "epoch": 4.951203643461288, "grad_norm": 1.7401025295257568, "learning_rate": 1.7071252603176323e-08, "loss": 0.6761, "step": 15220 }, { "epoch": 4.952830188679245, "grad_norm": 1.7839574813842773, "learning_rate": 1.6130972332345505e-08, "loss": 0.6721, "step": 15225 }, { "epoch": 4.954456733897202, "grad_norm": 1.6651092767715454, "learning_rate": 1.5217317600671467e-08, "loss": 0.6807, "step": 15230 }, { "epoch": 4.956083279115159, "grad_norm": 1.482200264930725, "learning_rate": 1.4330289381844775e-08, "loss": 0.6365, "step": 15235 }, { "epoch": 4.957709824333117, "grad_norm": 1.5187036991119385, "learning_rate": 1.3639838532236716e-08, "loss": 0.6896, "step": 15240 }, { "epoch": 4.959336369551074, "grad_norm": 1.5044300556182861, "learning_rate": 1.2800740399657173e-08, "loss": 0.6782, "step": 15245 }, { "epoch": 4.960962914769031, "grad_norm": 1.6710021495819092, "learning_rate": 1.1988271355295456e-08, "loss": 0.6892, "step": 15250 }, { "epoch": 4.9625894599869875, "grad_norm": 1.9449098110198975, "learning_rate": 1.120243226500617e-08, "loss": 0.6674, "step": 15255 }, { "epoch": 4.964216005204944, "grad_norm": 1.5615839958190918, "learning_rate": 1.0443223966263849e-08, "loss": 0.7005, "step": 15260 }, { "epoch": 4.965842550422902, "grad_norm": 2.059441566467285, "learning_rate": 9.71064726816573e-09, "loss": 0.6934, "step": 15265 }, { "epoch": 4.967469095640859, "grad_norm": 1.6521652936935425, "learning_rate": 9.004702951423416e-09, "loss": 0.6583, "step": 15270 }, { "epoch": 4.969095640858816, "grad_norm": 1.7705581188201904, "learning_rate": 8.32539176836844e-09, "loss": 0.673, "step": 15275 }, { "epoch": 4.970722186076773, "grad_norm": 1.6346153020858765, "learning_rate": 7.672714442952255e-09, "loss": 0.7019, "step": 15280 }, { "epoch": 4.97234873129473, "grad_norm": 1.7785457372665405, "learning_rate": 7.046671670735139e-09, "loss": 0.7038, "step": 15285 }, { "epoch": 4.973975276512687, "grad_norm": 1.717362642288208, "learning_rate": 6.447264118900065e-09, "loss": 0.6931, "step": 15290 }, { "epoch": 4.975601821730644, "grad_norm": 1.881536841392517, "learning_rate": 5.8744924262360556e-09, "loss": 0.6786, "step": 15295 }, { "epoch": 4.9772283669486015, "grad_norm": 1.656629204750061, "learning_rate": 5.328357203157608e-09, "loss": 0.6872, "step": 15300 }, { "epoch": 4.978854912166558, "grad_norm": 1.5284388065338135, "learning_rate": 4.808859031682489e-09, "loss": 0.6721, "step": 15305 }, { "epoch": 4.980481457384515, "grad_norm": 1.5851751565933228, "learning_rate": 4.315998465445614e-09, "loss": 0.6571, "step": 15310 }, { "epoch": 4.982108002602472, "grad_norm": 1.6401901245117188, "learning_rate": 3.849776029690721e-09, "loss": 0.6684, "step": 15315 }, { "epoch": 4.983734547820429, "grad_norm": 1.5574690103530884, "learning_rate": 3.4101922212786965e-09, "loss": 0.6197, "step": 15320 }, { "epoch": 4.985361093038387, "grad_norm": 1.8060216903686523, "learning_rate": 2.9972475086764705e-09, "loss": 0.6743, "step": 15325 }, { "epoch": 4.986987638256344, "grad_norm": 1.485886812210083, "learning_rate": 2.6109423319625736e-09, "loss": 0.6638, "step": 15330 }, { "epoch": 4.988614183474301, "grad_norm": 1.6018400192260742, "learning_rate": 2.2512771028271317e-09, "loss": 0.7085, "step": 15335 }, { "epoch": 4.990240728692258, "grad_norm": 1.5039092302322388, "learning_rate": 1.9182522045690934e-09, "loss": 0.6698, "step": 15340 }, { "epoch": 4.991867273910215, "grad_norm": 1.5566973686218262, "learning_rate": 1.6118679920934521e-09, "loss": 0.6912, "step": 15345 }, { "epoch": 4.9934938191281715, "grad_norm": 1.7856866121292114, "learning_rate": 1.3321247919195756e-09, "loss": 0.6769, "step": 15350 }, { "epoch": 4.995120364346128, "grad_norm": 1.4790265560150146, "learning_rate": 1.0790229021701015e-09, "loss": 0.7025, "step": 15355 }, { "epoch": 4.996746909564086, "grad_norm": 1.7348213195800781, "learning_rate": 8.525625925792646e-10, "loss": 0.6806, "step": 15360 }, { "epoch": 4.998373454782043, "grad_norm": 1.5219783782958984, "learning_rate": 6.527441044873462e-10, "loss": 0.678, "step": 15365 }, { "epoch": 5.0, "grad_norm": 2.1034603118896484, "learning_rate": 4.795676508434488e-10, "loss": 0.6729, "step": 15370 }, { "epoch": 5.0, "eval_f1": 0.8168653004456122, "eval_loss": 0.43408203125, "eval_precision": 0.8175998099908078, "eval_recall": 0.8163082686068494, "eval_runtime": 386.8317, "eval_samples_per_second": 1017.07, "eval_steps_per_second": 1.988, "step": 15370 }, { "epoch": 5.0, "step": 15370, "total_flos": 4.1404949424494346e+18, "train_loss": 0.7952802232433311, "train_runtime": 71244.959, "train_samples_per_second": 220.891, "train_steps_per_second": 0.216 } ], "logging_steps": 5, "max_steps": 15370, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 5.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.1404949424494346e+18, "train_batch_size": 512, "trial_name": null, "trial_params": null }