| { | |
| "best_metric": 0.03396870195865631, | |
| "best_model_checkpoint": "text-performance-longformer/checkpoint-7374", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 7374, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01017087062652563, | |
| "grad_norm": 13.638067245483398, | |
| "learning_rate": 1.6260162601626018e-06, | |
| "loss": 0.3817, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02034174125305126, | |
| "grad_norm": 6.143240928649902, | |
| "learning_rate": 3.3197831978319785e-06, | |
| "loss": 0.2287, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.030512611879576892, | |
| "grad_norm": 1.5986428260803223, | |
| "learning_rate": 5.013550135501355e-06, | |
| "loss": 0.0691, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04068348250610252, | |
| "grad_norm": 0.9884291887283325, | |
| "learning_rate": 6.707317073170733e-06, | |
| "loss": 0.0543, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.050854353132628156, | |
| "grad_norm": 3.724142074584961, | |
| "learning_rate": 8.401084010840109e-06, | |
| "loss": 0.0619, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.061025223759153785, | |
| "grad_norm": 2.291118860244751, | |
| "learning_rate": 1.0094850948509485e-05, | |
| "loss": 0.0523, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07119609438567942, | |
| "grad_norm": 2.3093833923339844, | |
| "learning_rate": 1.1788617886178862e-05, | |
| "loss": 0.0541, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.08136696501220504, | |
| "grad_norm": 3.3519299030303955, | |
| "learning_rate": 1.348238482384824e-05, | |
| "loss": 0.0515, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09153783563873068, | |
| "grad_norm": 2.0906357765197754, | |
| "learning_rate": 1.5176151761517615e-05, | |
| "loss": 0.0529, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10170870626525631, | |
| "grad_norm": 1.1683790683746338, | |
| "learning_rate": 1.6802168021680217e-05, | |
| "loss": 0.0573, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11187957689178193, | |
| "grad_norm": 1.7971270084381104, | |
| "learning_rate": 1.8495934959349594e-05, | |
| "loss": 0.0495, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12205044751830757, | |
| "grad_norm": 3.8092596530914307, | |
| "learning_rate": 2.018970189701897e-05, | |
| "loss": 0.0462, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1322213181448332, | |
| "grad_norm": 2.3589837551116943, | |
| "learning_rate": 2.1883468834688347e-05, | |
| "loss": 0.044, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.14239218877135884, | |
| "grad_norm": 1.4069643020629883, | |
| "learning_rate": 2.3577235772357724e-05, | |
| "loss": 0.0458, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.15256305939788445, | |
| "grad_norm": 0.8279024362564087, | |
| "learning_rate": 2.5271002710027104e-05, | |
| "loss": 0.0457, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.16273393002441008, | |
| "grad_norm": 0.7554256916046143, | |
| "learning_rate": 2.696476964769648e-05, | |
| "loss": 0.0431, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.17290480065093572, | |
| "grad_norm": 0.7221837043762207, | |
| "learning_rate": 2.8658536585365854e-05, | |
| "loss": 0.0455, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.18307567127746135, | |
| "grad_norm": 1.551680088043213, | |
| "learning_rate": 3.035230352303523e-05, | |
| "loss": 0.0443, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.193246541903987, | |
| "grad_norm": 1.2256766557693481, | |
| "learning_rate": 3.204607046070461e-05, | |
| "loss": 0.0423, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.20341741253051263, | |
| "grad_norm": 1.9404680728912354, | |
| "learning_rate": 3.373983739837399e-05, | |
| "loss": 0.0454, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.21358828315703823, | |
| "grad_norm": 1.481037974357605, | |
| "learning_rate": 3.5433604336043364e-05, | |
| "loss": 0.0549, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.22375915378356387, | |
| "grad_norm": 1.0439728498458862, | |
| "learning_rate": 3.712737127371274e-05, | |
| "loss": 0.0474, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2339300244100895, | |
| "grad_norm": 2.524623394012451, | |
| "learning_rate": 3.882113821138211e-05, | |
| "loss": 0.0442, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.24410089503661514, | |
| "grad_norm": 0.46617817878723145, | |
| "learning_rate": 4.051490514905149e-05, | |
| "loss": 0.0416, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.25427176566314075, | |
| "grad_norm": 0.9644585251808167, | |
| "learning_rate": 4.220867208672087e-05, | |
| "loss": 0.0459, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2644426362896664, | |
| "grad_norm": 0.7916778922080994, | |
| "learning_rate": 4.390243902439025e-05, | |
| "loss": 0.0453, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.274613506916192, | |
| "grad_norm": 0.41167861223220825, | |
| "learning_rate": 4.5596205962059624e-05, | |
| "loss": 0.0447, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.2847843775427177, | |
| "grad_norm": 2.670891046524048, | |
| "learning_rate": 4.7289972899729e-05, | |
| "loss": 0.0436, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2949552481692433, | |
| "grad_norm": 0.5455201268196106, | |
| "learning_rate": 4.898373983739837e-05, | |
| "loss": 0.046, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3051261187957689, | |
| "grad_norm": 0.477638304233551, | |
| "learning_rate": 4.9924653405666065e-05, | |
| "loss": 0.0461, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.31529698942229456, | |
| "grad_norm": 1.8583427667617798, | |
| "learning_rate": 4.973628691983123e-05, | |
| "loss": 0.046, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.32546786004882017, | |
| "grad_norm": 1.4675369262695312, | |
| "learning_rate": 4.954792043399639e-05, | |
| "loss": 0.043, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.33563873067534583, | |
| "grad_norm": 0.5187409520149231, | |
| "learning_rate": 4.9359553948161545e-05, | |
| "loss": 0.0465, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.34580960130187144, | |
| "grad_norm": 1.1287797689437866, | |
| "learning_rate": 4.917118746232671e-05, | |
| "loss": 0.0395, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.35598047192839705, | |
| "grad_norm": 1.6059693098068237, | |
| "learning_rate": 4.8982820976491866e-05, | |
| "loss": 0.0463, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3661513425549227, | |
| "grad_norm": 0.5075823664665222, | |
| "learning_rate": 4.8794454490657024e-05, | |
| "loss": 0.0431, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3763222131814483, | |
| "grad_norm": 0.4540039896965027, | |
| "learning_rate": 4.860608800482219e-05, | |
| "loss": 0.0444, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.386493083807974, | |
| "grad_norm": 0.3790999948978424, | |
| "learning_rate": 4.8417721518987346e-05, | |
| "loss": 0.0408, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3966639544344996, | |
| "grad_norm": 0.35743555426597595, | |
| "learning_rate": 4.82293550331525e-05, | |
| "loss": 0.0412, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.40683482506102525, | |
| "grad_norm": 0.9319044351577759, | |
| "learning_rate": 4.804098854731766e-05, | |
| "loss": 0.0383, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.41700569568755086, | |
| "grad_norm": 0.9398106336593628, | |
| "learning_rate": 4.785262206148282e-05, | |
| "loss": 0.0439, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.42717656631407647, | |
| "grad_norm": 0.5996136665344238, | |
| "learning_rate": 4.766425557564798e-05, | |
| "loss": 0.0455, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.43734743694060213, | |
| "grad_norm": 2.0928783416748047, | |
| "learning_rate": 4.747588908981314e-05, | |
| "loss": 0.0451, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.44751830756712774, | |
| "grad_norm": 0.42079484462738037, | |
| "learning_rate": 4.7287522603978304e-05, | |
| "loss": 0.0476, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4576891781936534, | |
| "grad_norm": 1.7878057956695557, | |
| "learning_rate": 4.709915611814346e-05, | |
| "loss": 0.0434, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.467860048820179, | |
| "grad_norm": 0.3113216459751129, | |
| "learning_rate": 4.691078963230862e-05, | |
| "loss": 0.042, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4780309194467046, | |
| "grad_norm": 0.319442480802536, | |
| "learning_rate": 4.6722423146473784e-05, | |
| "loss": 0.0409, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.4882017900732303, | |
| "grad_norm": 0.9614000916481018, | |
| "learning_rate": 4.653405666063894e-05, | |
| "loss": 0.0391, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4983726606997559, | |
| "grad_norm": 0.6299770474433899, | |
| "learning_rate": 4.63456901748041e-05, | |
| "loss": 0.0414, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.5085435313262815, | |
| "grad_norm": 0.8669236898422241, | |
| "learning_rate": 4.615732368896926e-05, | |
| "loss": 0.0388, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5187144019528072, | |
| "grad_norm": 0.6755848526954651, | |
| "learning_rate": 4.596895720313442e-05, | |
| "loss": 0.0421, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.5288852725793328, | |
| "grad_norm": 0.5094274282455444, | |
| "learning_rate": 4.5780590717299585e-05, | |
| "loss": 0.0408, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5390561432058584, | |
| "grad_norm": 1.5034645795822144, | |
| "learning_rate": 4.559222423146474e-05, | |
| "loss": 0.0467, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.549227013832384, | |
| "grad_norm": 1.6210927963256836, | |
| "learning_rate": 4.54038577456299e-05, | |
| "loss": 0.0443, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5593978844589097, | |
| "grad_norm": 0.4528130292892456, | |
| "learning_rate": 4.5215491259795064e-05, | |
| "loss": 0.041, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.5695687550854354, | |
| "grad_norm": 0.6066830158233643, | |
| "learning_rate": 4.5027124773960215e-05, | |
| "loss": 0.0427, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5797396257119609, | |
| "grad_norm": 0.2950328588485718, | |
| "learning_rate": 4.483875828812538e-05, | |
| "loss": 0.0417, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.5899104963384866, | |
| "grad_norm": 0.425102561712265, | |
| "learning_rate": 4.4650391802290537e-05, | |
| "loss": 0.0391, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6000813669650122, | |
| "grad_norm": 0.5589340329170227, | |
| "learning_rate": 4.4462025316455694e-05, | |
| "loss": 0.0406, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.6102522375915378, | |
| "grad_norm": 3.0561866760253906, | |
| "learning_rate": 4.427365883062086e-05, | |
| "loss": 0.0403, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6204231082180635, | |
| "grad_norm": 0.6851157546043396, | |
| "learning_rate": 4.4085292344786016e-05, | |
| "loss": 0.0405, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.6305939788445891, | |
| "grad_norm": 0.5816906690597534, | |
| "learning_rate": 4.389692585895118e-05, | |
| "loss": 0.0465, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6407648494711147, | |
| "grad_norm": 1.0224462747573853, | |
| "learning_rate": 4.370855937311634e-05, | |
| "loss": 0.0419, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.6509357200976403, | |
| "grad_norm": 0.4752540588378906, | |
| "learning_rate": 4.3520192887281495e-05, | |
| "loss": 0.0405, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.661106590724166, | |
| "grad_norm": 1.1999990940093994, | |
| "learning_rate": 4.333182640144666e-05, | |
| "loss": 0.041, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.6712774613506917, | |
| "grad_norm": 0.40787383913993835, | |
| "learning_rate": 4.314345991561182e-05, | |
| "loss": 0.0411, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6814483319772172, | |
| "grad_norm": 0.26120448112487793, | |
| "learning_rate": 4.2955093429776974e-05, | |
| "loss": 0.0401, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.6916192026037429, | |
| "grad_norm": 0.5405380725860596, | |
| "learning_rate": 4.276672694394214e-05, | |
| "loss": 0.0387, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7017900732302685, | |
| "grad_norm": 0.2800443768501282, | |
| "learning_rate": 4.2578360458107296e-05, | |
| "loss": 0.0411, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.7119609438567941, | |
| "grad_norm": 0.5752384662628174, | |
| "learning_rate": 4.2389993972272454e-05, | |
| "loss": 0.0404, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7221318144833198, | |
| "grad_norm": 0.778127133846283, | |
| "learning_rate": 4.220162748643762e-05, | |
| "loss": 0.0428, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.7323026851098454, | |
| "grad_norm": 1.7695764303207397, | |
| "learning_rate": 4.2013261000602775e-05, | |
| "loss": 0.0353, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7424735557363711, | |
| "grad_norm": 0.2690475285053253, | |
| "learning_rate": 4.182489451476794e-05, | |
| "loss": 0.0404, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.7526444263628966, | |
| "grad_norm": 0.506496012210846, | |
| "learning_rate": 4.163652802893309e-05, | |
| "loss": 0.0402, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7628152969894223, | |
| "grad_norm": 0.7220098972320557, | |
| "learning_rate": 4.1448161543098255e-05, | |
| "loss": 0.0372, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.772986167615948, | |
| "grad_norm": 1.1999934911727905, | |
| "learning_rate": 4.125979505726341e-05, | |
| "loss": 0.0429, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7831570382424735, | |
| "grad_norm": 0.5055158734321594, | |
| "learning_rate": 4.107142857142857e-05, | |
| "loss": 0.0367, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.7933279088689992, | |
| "grad_norm": 0.5015272498130798, | |
| "learning_rate": 4.0883062085593734e-05, | |
| "loss": 0.0376, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.8034987794955248, | |
| "grad_norm": 0.6382879018783569, | |
| "learning_rate": 4.069469559975889e-05, | |
| "loss": 0.0387, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.8136696501220505, | |
| "grad_norm": 1.4380210638046265, | |
| "learning_rate": 4.050632911392405e-05, | |
| "loss": 0.0433, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.823840520748576, | |
| "grad_norm": 0.46868982911109924, | |
| "learning_rate": 4.031796262808921e-05, | |
| "loss": 0.0409, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.8340113913751017, | |
| "grad_norm": 0.47722935676574707, | |
| "learning_rate": 4.012959614225437e-05, | |
| "loss": 0.0367, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8441822620016274, | |
| "grad_norm": 0.66898113489151, | |
| "learning_rate": 3.9941229656419535e-05, | |
| "loss": 0.0426, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.8543531326281529, | |
| "grad_norm": 0.640957772731781, | |
| "learning_rate": 3.975286317058469e-05, | |
| "loss": 0.0374, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8645240032546786, | |
| "grad_norm": 2.0676088333129883, | |
| "learning_rate": 3.956449668474985e-05, | |
| "loss": 0.0391, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.8746948738812043, | |
| "grad_norm": 0.3028632402420044, | |
| "learning_rate": 3.9376130198915014e-05, | |
| "loss": 0.0379, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8848657445077298, | |
| "grad_norm": 3.03582763671875, | |
| "learning_rate": 3.918776371308017e-05, | |
| "loss": 0.0396, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.8950366151342555, | |
| "grad_norm": 0.5792508721351624, | |
| "learning_rate": 3.899939722724533e-05, | |
| "loss": 0.0393, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9052074857607811, | |
| "grad_norm": 0.6225530505180359, | |
| "learning_rate": 3.8811030741410494e-05, | |
| "loss": 0.0413, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.9153783563873068, | |
| "grad_norm": 1.2196191549301147, | |
| "learning_rate": 3.862266425557565e-05, | |
| "loss": 0.0411, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9255492270138324, | |
| "grad_norm": 1.1619917154312134, | |
| "learning_rate": 3.843429776974081e-05, | |
| "loss": 0.0372, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.935720097640358, | |
| "grad_norm": 0.4266558289527893, | |
| "learning_rate": 3.8245931283905966e-05, | |
| "loss": 0.0382, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9458909682668837, | |
| "grad_norm": 0.2716640830039978, | |
| "learning_rate": 3.8057564798071124e-05, | |
| "loss": 0.0364, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.9560618388934092, | |
| "grad_norm": 0.3579002618789673, | |
| "learning_rate": 3.787673297166968e-05, | |
| "loss": 0.0397, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9662327095199349, | |
| "grad_norm": 0.3124788999557495, | |
| "learning_rate": 3.768836648583484e-05, | |
| "loss": 0.0367, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.9764035801464606, | |
| "grad_norm": 0.7987418174743652, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0425, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9865744507729862, | |
| "grad_norm": 0.623573899269104, | |
| "learning_rate": 3.731163351416516e-05, | |
| "loss": 0.0407, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.9967453213995118, | |
| "grad_norm": 0.6313973665237427, | |
| "learning_rate": 3.712326702833032e-05, | |
| "loss": 0.0359, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_explained_variance": 0.22635483741760254, | |
| "eval_loss": 0.03618196025490761, | |
| "eval_mae": 0.1520702838897705, | |
| "eval_mse": 0.03618059679865837, | |
| "eval_r2": 0.22514164447784424, | |
| "eval_rmse": 0.19021197858877967, | |
| "eval_runtime": 39.05, | |
| "eval_samples_per_second": 503.405, | |
| "eval_steps_per_second": 7.887, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 1.0069161920260374, | |
| "grad_norm": 0.5591532588005066, | |
| "learning_rate": 3.693490054249548e-05, | |
| "loss": 0.0361, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.017087062652563, | |
| "grad_norm": 0.35737472772598267, | |
| "learning_rate": 3.674653405666064e-05, | |
| "loss": 0.035, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0272579332790888, | |
| "grad_norm": 1.0595606565475464, | |
| "learning_rate": 3.6558167570825805e-05, | |
| "loss": 0.0374, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.0374288039056143, | |
| "grad_norm": 0.42890357971191406, | |
| "learning_rate": 3.636980108499096e-05, | |
| "loss": 0.0366, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.0475996745321399, | |
| "grad_norm": 0.41043972969055176, | |
| "learning_rate": 3.618143459915612e-05, | |
| "loss": 0.0377, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.0577705451586656, | |
| "grad_norm": 0.5887218117713928, | |
| "learning_rate": 3.5993068113321284e-05, | |
| "loss": 0.0391, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.0679414157851912, | |
| "grad_norm": 0.245023712515831, | |
| "learning_rate": 3.5804701627486435e-05, | |
| "loss": 0.032, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.0781122864117167, | |
| "grad_norm": 0.4018308222293854, | |
| "learning_rate": 3.56163351416516e-05, | |
| "loss": 0.0401, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.0882831570382425, | |
| "grad_norm": 1.287443995475769, | |
| "learning_rate": 3.5427968655816756e-05, | |
| "loss": 0.0366, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.098454027664768, | |
| "grad_norm": 0.9983925223350525, | |
| "learning_rate": 3.5239602169981914e-05, | |
| "loss": 0.0408, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.1086248982912936, | |
| "grad_norm": 1.045338749885559, | |
| "learning_rate": 3.505123568414708e-05, | |
| "loss": 0.0411, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.1187957689178194, | |
| "grad_norm": 0.2734503746032715, | |
| "learning_rate": 3.4862869198312236e-05, | |
| "loss": 0.0373, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.128966639544345, | |
| "grad_norm": 0.4091496169567108, | |
| "learning_rate": 3.46745027124774e-05, | |
| "loss": 0.0362, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.1391375101708707, | |
| "grad_norm": 1.026307225227356, | |
| "learning_rate": 3.448613622664256e-05, | |
| "loss": 0.036, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1493083807973963, | |
| "grad_norm": 0.5424162149429321, | |
| "learning_rate": 3.4297769740807715e-05, | |
| "loss": 0.0362, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.1594792514239218, | |
| "grad_norm": 0.4070860743522644, | |
| "learning_rate": 3.410940325497288e-05, | |
| "loss": 0.0338, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.1696501220504476, | |
| "grad_norm": 0.861303448677063, | |
| "learning_rate": 3.392103676913804e-05, | |
| "loss": 0.0371, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.1798209926769732, | |
| "grad_norm": 0.6860642433166504, | |
| "learning_rate": 3.3732670283303194e-05, | |
| "loss": 0.0369, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.1899918633034987, | |
| "grad_norm": 0.6697980761528015, | |
| "learning_rate": 3.354430379746836e-05, | |
| "loss": 0.0379, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.2001627339300245, | |
| "grad_norm": 0.6728507876396179, | |
| "learning_rate": 3.3355937311633516e-05, | |
| "loss": 0.038, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.21033360455655, | |
| "grad_norm": 0.4306727349758148, | |
| "learning_rate": 3.316757082579868e-05, | |
| "loss": 0.0381, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.2205044751830756, | |
| "grad_norm": 0.8999012112617493, | |
| "learning_rate": 3.297920433996384e-05, | |
| "loss": 0.0393, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2306753458096014, | |
| "grad_norm": 0.358962744474411, | |
| "learning_rate": 3.2790837854128995e-05, | |
| "loss": 0.0374, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.240846216436127, | |
| "grad_norm": 0.8197824954986572, | |
| "learning_rate": 3.260247136829416e-05, | |
| "loss": 0.0391, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.2510170870626527, | |
| "grad_norm": 0.6671149730682373, | |
| "learning_rate": 3.241410488245931e-05, | |
| "loss": 0.0357, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.2611879576891782, | |
| "grad_norm": 0.8932905197143555, | |
| "learning_rate": 3.2225738396624475e-05, | |
| "loss": 0.038, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2713588283157038, | |
| "grad_norm": 0.3033260405063629, | |
| "learning_rate": 3.203737191078963e-05, | |
| "loss": 0.0364, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.2815296989422293, | |
| "grad_norm": 0.6071414351463318, | |
| "learning_rate": 3.184900542495479e-05, | |
| "loss": 0.0325, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.2917005695687551, | |
| "grad_norm": 0.28337907791137695, | |
| "learning_rate": 3.1660638939119954e-05, | |
| "loss": 0.0373, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.3018714401952807, | |
| "grad_norm": 0.5393190979957581, | |
| "learning_rate": 3.147227245328511e-05, | |
| "loss": 0.0362, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.3120423108218064, | |
| "grad_norm": 1.3214200735092163, | |
| "learning_rate": 3.128390596745027e-05, | |
| "loss": 0.0368, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.322213181448332, | |
| "grad_norm": 0.5053825974464417, | |
| "learning_rate": 3.109553948161543e-05, | |
| "loss": 0.0375, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.3323840520748575, | |
| "grad_norm": 0.4198523759841919, | |
| "learning_rate": 3.090717299578059e-05, | |
| "loss": 0.0404, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.342554922701383, | |
| "grad_norm": 0.9187168478965759, | |
| "learning_rate": 3.0718806509945755e-05, | |
| "loss": 0.0374, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.3527257933279089, | |
| "grad_norm": 0.3942495882511139, | |
| "learning_rate": 3.053044002411091e-05, | |
| "loss": 0.0362, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.3628966639544344, | |
| "grad_norm": 0.6447917819023132, | |
| "learning_rate": 3.0342073538276073e-05, | |
| "loss": 0.038, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.3730675345809602, | |
| "grad_norm": 0.3163827061653137, | |
| "learning_rate": 3.015370705244123e-05, | |
| "loss": 0.0369, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.3832384052074858, | |
| "grad_norm": 0.3072253465652466, | |
| "learning_rate": 2.9965340566606392e-05, | |
| "loss": 0.0385, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.3934092758340113, | |
| "grad_norm": 0.4808538556098938, | |
| "learning_rate": 2.9776974080771553e-05, | |
| "loss": 0.0378, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.403580146460537, | |
| "grad_norm": 0.2448228895664215, | |
| "learning_rate": 2.9588607594936714e-05, | |
| "loss": 0.0337, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.4137510170870626, | |
| "grad_norm": 0.42077022790908813, | |
| "learning_rate": 2.940024110910187e-05, | |
| "loss": 0.0359, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.4239218877135884, | |
| "grad_norm": 0.48862871527671814, | |
| "learning_rate": 2.921187462326703e-05, | |
| "loss": 0.0378, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.434092758340114, | |
| "grad_norm": 0.5912586450576782, | |
| "learning_rate": 2.9023508137432186e-05, | |
| "loss": 0.0384, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.4442636289666395, | |
| "grad_norm": 0.4059402644634247, | |
| "learning_rate": 2.8835141651597347e-05, | |
| "loss": 0.0381, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.454434499593165, | |
| "grad_norm": 0.3919837176799774, | |
| "learning_rate": 2.8646775165762508e-05, | |
| "loss": 0.036, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.4646053702196908, | |
| "grad_norm": 0.2935680150985718, | |
| "learning_rate": 2.845840867992767e-05, | |
| "loss": 0.0368, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.4747762408462164, | |
| "grad_norm": 0.7148743867874146, | |
| "learning_rate": 2.8270042194092826e-05, | |
| "loss": 0.0386, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.4849471114727422, | |
| "grad_norm": 1.314514398574829, | |
| "learning_rate": 2.8081675708257987e-05, | |
| "loss": 0.0375, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.4951179820992677, | |
| "grad_norm": 1.6261988878250122, | |
| "learning_rate": 2.7893309222423148e-05, | |
| "loss": 0.0376, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.5052888527257933, | |
| "grad_norm": 1.105427861213684, | |
| "learning_rate": 2.770494273658831e-05, | |
| "loss": 0.0354, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.5154597233523188, | |
| "grad_norm": 0.5577530264854431, | |
| "learning_rate": 2.7516576250753466e-05, | |
| "loss": 0.0362, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.5256305939788446, | |
| "grad_norm": 0.49557003378868103, | |
| "learning_rate": 2.7328209764918627e-05, | |
| "loss": 0.0389, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.5358014646053704, | |
| "grad_norm": 0.8892014622688293, | |
| "learning_rate": 2.7139843279083788e-05, | |
| "loss": 0.0379, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.545972335231896, | |
| "grad_norm": 0.5090736150741577, | |
| "learning_rate": 2.695147679324895e-05, | |
| "loss": 0.0359, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5561432058584215, | |
| "grad_norm": 0.8963241577148438, | |
| "learning_rate": 2.6763110307414107e-05, | |
| "loss": 0.0421, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.566314076484947, | |
| "grad_norm": 0.3889683485031128, | |
| "learning_rate": 2.6574743821579268e-05, | |
| "loss": 0.0362, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.5764849471114726, | |
| "grad_norm": 0.6879289150238037, | |
| "learning_rate": 2.638637733574443e-05, | |
| "loss": 0.0367, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.5866558177379984, | |
| "grad_norm": 1.1574759483337402, | |
| "learning_rate": 2.619801084990959e-05, | |
| "loss": 0.0387, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.5968266883645241, | |
| "grad_norm": 1.3032798767089844, | |
| "learning_rate": 2.6009644364074747e-05, | |
| "loss": 0.0366, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.6069975589910497, | |
| "grad_norm": 1.0193997621536255, | |
| "learning_rate": 2.58212778782399e-05, | |
| "loss": 0.0325, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.6171684296175752, | |
| "grad_norm": 1.767223834991455, | |
| "learning_rate": 2.5632911392405062e-05, | |
| "loss": 0.0393, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.6273393002441008, | |
| "grad_norm": 1.016648530960083, | |
| "learning_rate": 2.5444544906570223e-05, | |
| "loss": 0.0333, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6375101708706266, | |
| "grad_norm": 2.0735578536987305, | |
| "learning_rate": 2.5256178420735384e-05, | |
| "loss": 0.0355, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.647681041497152, | |
| "grad_norm": 0.8982949256896973, | |
| "learning_rate": 2.506781193490054e-05, | |
| "loss": 0.0369, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.6578519121236779, | |
| "grad_norm": 0.324400395154953, | |
| "learning_rate": 2.4879445449065702e-05, | |
| "loss": 0.0358, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.6680227827502034, | |
| "grad_norm": 0.32701972126960754, | |
| "learning_rate": 2.4691078963230863e-05, | |
| "loss": 0.0336, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.678193653376729, | |
| "grad_norm": 1.151262640953064, | |
| "learning_rate": 2.4502712477396024e-05, | |
| "loss": 0.0392, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.6883645240032545, | |
| "grad_norm": 0.5716719627380371, | |
| "learning_rate": 2.431434599156118e-05, | |
| "loss": 0.0383, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.6985353946297803, | |
| "grad_norm": 0.8748169541358948, | |
| "learning_rate": 2.4125979505726342e-05, | |
| "loss": 0.0344, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.708706265256306, | |
| "grad_norm": 0.25271666049957275, | |
| "learning_rate": 2.3937613019891503e-05, | |
| "loss": 0.0351, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.7188771358828316, | |
| "grad_norm": 0.23297059535980225, | |
| "learning_rate": 2.3749246534056664e-05, | |
| "loss": 0.0337, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.7290480065093572, | |
| "grad_norm": 0.3409133851528168, | |
| "learning_rate": 2.356088004822182e-05, | |
| "loss": 0.0346, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.7392188771358827, | |
| "grad_norm": 0.822523832321167, | |
| "learning_rate": 2.337251356238698e-05, | |
| "loss": 0.0374, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.7493897477624083, | |
| "grad_norm": 1.9754129648208618, | |
| "learning_rate": 2.318414707655214e-05, | |
| "loss": 0.0409, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.759560618388934, | |
| "grad_norm": 0.49358049035072327, | |
| "learning_rate": 2.29957805907173e-05, | |
| "loss": 0.0379, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.7697314890154598, | |
| "grad_norm": 0.6075097918510437, | |
| "learning_rate": 2.280741410488246e-05, | |
| "loss": 0.0358, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.7799023596419854, | |
| "grad_norm": 0.5666526556015015, | |
| "learning_rate": 2.261904761904762e-05, | |
| "loss": 0.0337, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.790073230268511, | |
| "grad_norm": 0.7485412955284119, | |
| "learning_rate": 2.243068113321278e-05, | |
| "loss": 0.037, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.8002441008950365, | |
| "grad_norm": 0.585403323173523, | |
| "learning_rate": 2.224231464737794e-05, | |
| "loss": 0.0379, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.8104149715215623, | |
| "grad_norm": 0.7822312712669373, | |
| "learning_rate": 2.2053948161543102e-05, | |
| "loss": 0.0368, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.8205858421480878, | |
| "grad_norm": 0.3547162413597107, | |
| "learning_rate": 2.186558167570826e-05, | |
| "loss": 0.038, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.8307567127746136, | |
| "grad_norm": 0.5509994626045227, | |
| "learning_rate": 2.1677215189873417e-05, | |
| "loss": 0.0371, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8409275834011392, | |
| "grad_norm": 1.1359673738479614, | |
| "learning_rate": 2.1488848704038578e-05, | |
| "loss": 0.0321, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.8510984540276647, | |
| "grad_norm": 0.7981705069541931, | |
| "learning_rate": 2.130048221820374e-05, | |
| "loss": 0.0369, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.8612693246541903, | |
| "grad_norm": 0.3582057058811188, | |
| "learning_rate": 2.11121157323689e-05, | |
| "loss": 0.0412, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.871440195280716, | |
| "grad_norm": 0.9928992986679077, | |
| "learning_rate": 2.0923749246534057e-05, | |
| "loss": 0.0352, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.8816110659072418, | |
| "grad_norm": 0.48575785756111145, | |
| "learning_rate": 2.0735382760699218e-05, | |
| "loss": 0.035, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.8917819365337674, | |
| "grad_norm": 0.5365208387374878, | |
| "learning_rate": 2.054701627486438e-05, | |
| "loss": 0.0379, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.901952807160293, | |
| "grad_norm": 1.141358494758606, | |
| "learning_rate": 2.0358649789029536e-05, | |
| "loss": 0.0355, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.9121236777868185, | |
| "grad_norm": 0.43180742859840393, | |
| "learning_rate": 2.0170283303194694e-05, | |
| "loss": 0.0354, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.922294548413344, | |
| "grad_norm": 0.7140740752220154, | |
| "learning_rate": 1.9981916817359855e-05, | |
| "loss": 0.0355, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.9324654190398698, | |
| "grad_norm": 0.30647122859954834, | |
| "learning_rate": 1.9793550331525016e-05, | |
| "loss": 0.0371, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.9426362896663956, | |
| "grad_norm": 0.42196792364120483, | |
| "learning_rate": 1.9605183845690177e-05, | |
| "loss": 0.0356, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.9528071602929211, | |
| "grad_norm": 0.6331903338432312, | |
| "learning_rate": 1.9416817359855334e-05, | |
| "loss": 0.0352, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.9629780309194467, | |
| "grad_norm": 0.7057808637619019, | |
| "learning_rate": 1.9228450874020495e-05, | |
| "loss": 0.0364, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.9731489015459722, | |
| "grad_norm": 0.49434205889701843, | |
| "learning_rate": 1.9040084388185656e-05, | |
| "loss": 0.0347, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.983319772172498, | |
| "grad_norm": 0.3139288127422333, | |
| "learning_rate": 1.8851717902350817e-05, | |
| "loss": 0.0364, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.9934906427990235, | |
| "grad_norm": 0.3922992944717407, | |
| "learning_rate": 1.8663351416515974e-05, | |
| "loss": 0.036, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_explained_variance": 0.25461888313293457, | |
| "eval_loss": 0.03482421860098839, | |
| "eval_mae": 0.14410310983657837, | |
| "eval_mse": 0.0348237045109272, | |
| "eval_r2": 0.25420135259628296, | |
| "eval_rmse": 0.18661110500430353, | |
| "eval_runtime": 39.031, | |
| "eval_samples_per_second": 503.652, | |
| "eval_steps_per_second": 7.891, | |
| "step": 4916 | |
| }, | |
| { | |
| "epoch": 2.0036615134255493, | |
| "grad_norm": 0.5134842395782471, | |
| "learning_rate": 1.8474984930681132e-05, | |
| "loss": 0.0344, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.013832384052075, | |
| "grad_norm": 0.3377295136451721, | |
| "learning_rate": 1.8286618444846293e-05, | |
| "loss": 0.0336, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.0240032546786004, | |
| "grad_norm": 0.3855837285518646, | |
| "learning_rate": 1.8098251959011453e-05, | |
| "loss": 0.0316, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 2.034174125305126, | |
| "grad_norm": 0.4808228313922882, | |
| "learning_rate": 1.7909885473176614e-05, | |
| "loss": 0.0347, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.0443449959316515, | |
| "grad_norm": 0.6781342029571533, | |
| "learning_rate": 1.7721518987341772e-05, | |
| "loss": 0.0366, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 2.0545158665581775, | |
| "grad_norm": 0.5457364320755005, | |
| "learning_rate": 1.7533152501506933e-05, | |
| "loss": 0.0326, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.064686737184703, | |
| "grad_norm": 1.1539140939712524, | |
| "learning_rate": 1.7344786015672094e-05, | |
| "loss": 0.032, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.0748576078112286, | |
| "grad_norm": 0.7635537981987, | |
| "learning_rate": 1.7156419529837255e-05, | |
| "loss": 0.0318, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.085028478437754, | |
| "grad_norm": 0.8772742748260498, | |
| "learning_rate": 1.6968053044002412e-05, | |
| "loss": 0.0337, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.0951993490642797, | |
| "grad_norm": 0.45236992835998535, | |
| "learning_rate": 1.677968655816757e-05, | |
| "loss": 0.031, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.1053702196908057, | |
| "grad_norm": 0.5050310492515564, | |
| "learning_rate": 1.659132007233273e-05, | |
| "loss": 0.031, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 2.1155410903173313, | |
| "grad_norm": 0.442862331867218, | |
| "learning_rate": 1.640295358649789e-05, | |
| "loss": 0.0342, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.125711960943857, | |
| "grad_norm": 0.5236470103263855, | |
| "learning_rate": 1.6214587100663052e-05, | |
| "loss": 0.0372, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 2.1358828315703824, | |
| "grad_norm": 0.9813937544822693, | |
| "learning_rate": 1.602622061482821e-05, | |
| "loss": 0.0326, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.146053702196908, | |
| "grad_norm": 0.349025696516037, | |
| "learning_rate": 1.583785412899337e-05, | |
| "loss": 0.0346, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 2.1562245728234335, | |
| "grad_norm": 0.35612091422080994, | |
| "learning_rate": 1.564948764315853e-05, | |
| "loss": 0.0342, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.1663954434499595, | |
| "grad_norm": 0.5912727117538452, | |
| "learning_rate": 1.5461121157323692e-05, | |
| "loss": 0.0324, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 2.176566314076485, | |
| "grad_norm": 0.2870270609855652, | |
| "learning_rate": 1.5272754671488847e-05, | |
| "loss": 0.0342, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.1867371847030106, | |
| "grad_norm": 0.3680706322193146, | |
| "learning_rate": 1.5084388185654007e-05, | |
| "loss": 0.0329, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 2.196908055329536, | |
| "grad_norm": 0.9814783930778503, | |
| "learning_rate": 1.4896021699819168e-05, | |
| "loss": 0.0293, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.2070789259560617, | |
| "grad_norm": 0.7239277958869934, | |
| "learning_rate": 1.4707655213984328e-05, | |
| "loss": 0.0346, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 2.2172497965825873, | |
| "grad_norm": 0.44417452812194824, | |
| "learning_rate": 1.4519288728149488e-05, | |
| "loss": 0.0339, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.2274206672091132, | |
| "grad_norm": 0.3636336326599121, | |
| "learning_rate": 1.4330922242314648e-05, | |
| "loss": 0.0327, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 2.237591537835639, | |
| "grad_norm": 0.2732349634170532, | |
| "learning_rate": 1.4142555756479809e-05, | |
| "loss": 0.032, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.2477624084621644, | |
| "grad_norm": 0.820342481136322, | |
| "learning_rate": 1.3954189270644968e-05, | |
| "loss": 0.0318, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 2.25793327908869, | |
| "grad_norm": 0.31075552105903625, | |
| "learning_rate": 1.3765822784810129e-05, | |
| "loss": 0.0311, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.2681041497152155, | |
| "grad_norm": 0.8737571835517883, | |
| "learning_rate": 1.3577456298975286e-05, | |
| "loss": 0.0356, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 2.2782750203417415, | |
| "grad_norm": 0.9981245994567871, | |
| "learning_rate": 1.3389089813140445e-05, | |
| "loss": 0.0333, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.288445890968267, | |
| "grad_norm": 0.5384612679481506, | |
| "learning_rate": 1.3200723327305606e-05, | |
| "loss": 0.0316, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 2.2986167615947926, | |
| "grad_norm": 0.6893337965011597, | |
| "learning_rate": 1.3012356841470765e-05, | |
| "loss": 0.0324, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.308787632221318, | |
| "grad_norm": 0.449916273355484, | |
| "learning_rate": 1.2823990355635926e-05, | |
| "loss": 0.0325, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 2.3189585028478437, | |
| "grad_norm": 0.38824161887168884, | |
| "learning_rate": 1.2635623869801086e-05, | |
| "loss": 0.0339, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.329129373474369, | |
| "grad_norm": 0.7458836436271667, | |
| "learning_rate": 1.2447257383966246e-05, | |
| "loss": 0.0355, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 2.339300244100895, | |
| "grad_norm": 0.47954612970352173, | |
| "learning_rate": 1.2258890898131404e-05, | |
| "loss": 0.0323, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.3494711147274208, | |
| "grad_norm": 0.42400848865509033, | |
| "learning_rate": 1.2070524412296565e-05, | |
| "loss": 0.0302, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 2.3596419853539463, | |
| "grad_norm": 1.189965009689331, | |
| "learning_rate": 1.1882157926461724e-05, | |
| "loss": 0.0338, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.369812855980472, | |
| "grad_norm": 0.5762277841567993, | |
| "learning_rate": 1.1693791440626885e-05, | |
| "loss": 0.0332, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 2.3799837266069974, | |
| "grad_norm": 0.5994691848754883, | |
| "learning_rate": 1.1505424954792044e-05, | |
| "loss": 0.0364, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.390154597233523, | |
| "grad_norm": 0.9533575773239136, | |
| "learning_rate": 1.1317058468957203e-05, | |
| "loss": 0.0326, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 2.400325467860049, | |
| "grad_norm": 0.4238649308681488, | |
| "learning_rate": 1.1128691983122364e-05, | |
| "loss": 0.034, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.4104963384865745, | |
| "grad_norm": 0.8726415038108826, | |
| "learning_rate": 1.0940325497287523e-05, | |
| "loss": 0.0327, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 2.4206672091131, | |
| "grad_norm": 0.5922726988792419, | |
| "learning_rate": 1.0751959011452683e-05, | |
| "loss": 0.0337, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.4308380797396256, | |
| "grad_norm": 0.3707614839076996, | |
| "learning_rate": 1.0563592525617842e-05, | |
| "loss": 0.0338, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 2.441008950366151, | |
| "grad_norm": 0.4853639602661133, | |
| "learning_rate": 1.0375226039783003e-05, | |
| "loss": 0.0317, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.451179820992677, | |
| "grad_norm": 0.8022235631942749, | |
| "learning_rate": 1.0186859553948162e-05, | |
| "loss": 0.032, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 2.4613506916192027, | |
| "grad_norm": 0.8553130030632019, | |
| "learning_rate": 9.998493068113323e-06, | |
| "loss": 0.0312, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.4715215622457283, | |
| "grad_norm": 0.4112774431705475, | |
| "learning_rate": 9.81012658227848e-06, | |
| "loss": 0.0349, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 2.481692432872254, | |
| "grad_norm": 0.8546609282493591, | |
| "learning_rate": 9.621760096443641e-06, | |
| "loss": 0.0332, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.4918633034987794, | |
| "grad_norm": 0.8445001840591431, | |
| "learning_rate": 9.4333936106088e-06, | |
| "loss": 0.0324, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 2.5020341741253054, | |
| "grad_norm": 0.4580422043800354, | |
| "learning_rate": 9.245027124773961e-06, | |
| "loss": 0.0346, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.5122050447518305, | |
| "grad_norm": 0.6121585369110107, | |
| "learning_rate": 9.05666063893912e-06, | |
| "loss": 0.0345, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 2.5223759153783565, | |
| "grad_norm": 0.5637044906616211, | |
| "learning_rate": 8.86829415310428e-06, | |
| "loss": 0.0315, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.532546786004882, | |
| "grad_norm": 0.6579483151435852, | |
| "learning_rate": 8.67992766726944e-06, | |
| "loss": 0.0345, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 2.5427176566314076, | |
| "grad_norm": 0.30682843923568726, | |
| "learning_rate": 8.499095840867993e-06, | |
| "loss": 0.0334, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.552888527257933, | |
| "grad_norm": 1.7261478900909424, | |
| "learning_rate": 8.310729355033153e-06, | |
| "loss": 0.0337, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 2.5630593978844587, | |
| "grad_norm": 0.7609931826591492, | |
| "learning_rate": 8.122362869198312e-06, | |
| "loss": 0.0329, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.5732302685109847, | |
| "grad_norm": 1.1947487592697144, | |
| "learning_rate": 7.933996383363473e-06, | |
| "loss": 0.0338, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 2.5834011391375102, | |
| "grad_norm": 0.5045105814933777, | |
| "learning_rate": 7.745629897528632e-06, | |
| "loss": 0.0336, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.593572009764036, | |
| "grad_norm": 0.8998399972915649, | |
| "learning_rate": 7.557263411693792e-06, | |
| "loss": 0.0334, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 2.6037428803905613, | |
| "grad_norm": 0.3800385594367981, | |
| "learning_rate": 7.368896925858952e-06, | |
| "loss": 0.0306, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.613913751017087, | |
| "grad_norm": 0.35073891282081604, | |
| "learning_rate": 7.180530440024111e-06, | |
| "loss": 0.0342, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 2.624084621643613, | |
| "grad_norm": 0.35614126920700073, | |
| "learning_rate": 6.992163954189271e-06, | |
| "loss": 0.0317, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.6342554922701384, | |
| "grad_norm": 1.0959842205047607, | |
| "learning_rate": 6.8037974683544305e-06, | |
| "loss": 0.0328, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 2.644426362896664, | |
| "grad_norm": 0.9010970592498779, | |
| "learning_rate": 6.6154309825195905e-06, | |
| "loss": 0.0364, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.6545972335231895, | |
| "grad_norm": 0.8300909996032715, | |
| "learning_rate": 6.42706449668475e-06, | |
| "loss": 0.0312, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 2.664768104149715, | |
| "grad_norm": 0.7244754433631897, | |
| "learning_rate": 6.23869801084991e-06, | |
| "loss": 0.0319, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.674938974776241, | |
| "grad_norm": 1.3230552673339844, | |
| "learning_rate": 6.05033152501507e-06, | |
| "loss": 0.0328, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 2.685109845402766, | |
| "grad_norm": 0.437537282705307, | |
| "learning_rate": 5.861965039180229e-06, | |
| "loss": 0.0325, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.695280716029292, | |
| "grad_norm": 0.4210902154445648, | |
| "learning_rate": 5.673598553345389e-06, | |
| "loss": 0.0362, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 2.7054515866558178, | |
| "grad_norm": 0.3914755880832672, | |
| "learning_rate": 5.485232067510549e-06, | |
| "loss": 0.0329, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.7156224572823433, | |
| "grad_norm": 0.9759465456008911, | |
| "learning_rate": 5.296865581675708e-06, | |
| "loss": 0.0343, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 2.725793327908869, | |
| "grad_norm": 0.34633737802505493, | |
| "learning_rate": 5.108499095840868e-06, | |
| "loss": 0.0308, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.7359641985353944, | |
| "grad_norm": 0.5408746600151062, | |
| "learning_rate": 4.9201326100060275e-06, | |
| "loss": 0.0285, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 2.7461350691619204, | |
| "grad_norm": 0.3921310007572174, | |
| "learning_rate": 4.7317661241711876e-06, | |
| "loss": 0.0307, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.756305939788446, | |
| "grad_norm": 0.6094385981559753, | |
| "learning_rate": 4.543399638336348e-06, | |
| "loss": 0.0303, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 2.7664768104149715, | |
| "grad_norm": 0.5900077819824219, | |
| "learning_rate": 4.355033152501508e-06, | |
| "loss": 0.0355, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.776647681041497, | |
| "grad_norm": 0.4339945912361145, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.0351, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 2.7868185516680226, | |
| "grad_norm": 0.9042001962661743, | |
| "learning_rate": 3.978300180831827e-06, | |
| "loss": 0.033, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.7969894222945486, | |
| "grad_norm": 0.5715941190719604, | |
| "learning_rate": 3.789933694996986e-06, | |
| "loss": 0.0325, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 2.807160292921074, | |
| "grad_norm": 0.40120917558670044, | |
| "learning_rate": 3.601567209162146e-06, | |
| "loss": 0.0324, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.8173311635475997, | |
| "grad_norm": 0.636159360408783, | |
| "learning_rate": 3.413200723327306e-06, | |
| "loss": 0.0311, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 2.8275020341741253, | |
| "grad_norm": 0.79677414894104, | |
| "learning_rate": 3.2248342374924654e-06, | |
| "loss": 0.0298, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.837672904800651, | |
| "grad_norm": 0.6220082640647888, | |
| "learning_rate": 3.036467751657625e-06, | |
| "loss": 0.0315, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 2.847843775427177, | |
| "grad_norm": 0.4538786709308624, | |
| "learning_rate": 2.848101265822785e-06, | |
| "loss": 0.032, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.858014646053702, | |
| "grad_norm": 0.44975048303604126, | |
| "learning_rate": 2.6597347799879447e-06, | |
| "loss": 0.0314, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 2.868185516680228, | |
| "grad_norm": 0.2438650280237198, | |
| "learning_rate": 2.4713682941531043e-06, | |
| "loss": 0.0322, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.8783563873067535, | |
| "grad_norm": 0.9189873337745667, | |
| "learning_rate": 2.2830018083182644e-06, | |
| "loss": 0.0315, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 2.888527257933279, | |
| "grad_norm": 0.31788191199302673, | |
| "learning_rate": 2.094635322483424e-06, | |
| "loss": 0.0287, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.8986981285598046, | |
| "grad_norm": 0.7033805847167969, | |
| "learning_rate": 1.9062688366485836e-06, | |
| "loss": 0.0331, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 2.90886899918633, | |
| "grad_norm": 0.3187176287174225, | |
| "learning_rate": 1.7179023508137434e-06, | |
| "loss": 0.0349, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.919039869812856, | |
| "grad_norm": 0.3502849042415619, | |
| "learning_rate": 1.529535864978903e-06, | |
| "loss": 0.0314, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 2.9292107404393817, | |
| "grad_norm": 0.38132113218307495, | |
| "learning_rate": 1.3411693791440627e-06, | |
| "loss": 0.0307, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.9393816110659072, | |
| "grad_norm": 0.335792601108551, | |
| "learning_rate": 1.1528028933092225e-06, | |
| "loss": 0.0329, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 2.949552481692433, | |
| "grad_norm": 0.43150436878204346, | |
| "learning_rate": 9.644364074743821e-07, | |
| "loss": 0.0279, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.9597233523189583, | |
| "grad_norm": 0.43568554520606995, | |
| "learning_rate": 7.76069921639542e-07, | |
| "loss": 0.0297, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 2.9698942229454843, | |
| "grad_norm": 0.2997362017631531, | |
| "learning_rate": 5.877034358047017e-07, | |
| "loss": 0.0315, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.98006509357201, | |
| "grad_norm": 0.555476725101471, | |
| "learning_rate": 3.993369499698613e-07, | |
| "loss": 0.0296, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 2.9902359641985354, | |
| "grad_norm": 0.31480032205581665, | |
| "learning_rate": 2.1097046413502108e-07, | |
| "loss": 0.0334, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_explained_variance": 0.273209810256958, | |
| "eval_loss": 0.03396870195865631, | |
| "eval_mae": 0.14112502336502075, | |
| "eval_mse": 0.03396843746304512, | |
| "eval_r2": 0.2725181579589844, | |
| "eval_rmse": 0.18430528332916862, | |
| "eval_runtime": 39.0131, | |
| "eval_samples_per_second": 503.882, | |
| "eval_steps_per_second": 7.895, | |
| "step": 7374 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 7374, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.937435559513293e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |