| { | |
| "epoch": 1, | |
| "n_tokens": 8003584, | |
| "global_step": 7816, | |
| "training_metrics": { | |
| "train/loss": 2.546875, | |
| "train/contrastive": 2.453125, | |
| "train/recons_loss": 0.5703125, | |
| "train/balance_loss": 3.84375, | |
| "train/balance_loss_contrastive": 2.84375, | |
| "train/balance_loss_recons": 1.0078125, | |
| "train/contrastive_std": 3.359375, | |
| "train/recons_std": 0.0703125, | |
| "train/contrastive_min": 0.083984375, | |
| "train/contrastive_max": 7.125, | |
| "train/recons_min": 0.48828125, | |
| "train/recons_max": 0.671875, | |
| "train/Qwen3_0.6B_layer_2": 0.671875, | |
| "train/Qwen3_0.6B_layer_4": 0.54296875, | |
| "train/Qwen3_1.7B_layer_2": 0.52734375, | |
| "train/Qwen3_1.7B_layer_4": 0.640625, | |
| "train/Qwen3_4B_layer_2": 0.48828125, | |
| "train/Qwen3_4B_layer_4": 0.5625, | |
| "train/contrastives": null, | |
| "train/epoch": 1, | |
| "train/n_tokens": 8003584, | |
| "train/step": 7816 | |
| }, | |
| "eval_metrics": { | |
| "global_step": 7816, | |
| "n_tokens": 8003584, | |
| "kl_divergence": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_2": 6.801623344421387, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": 6.516300201416016, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": 6.550345420837402, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": 6.498440742492676, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": 6.312735080718994, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": 6.4551262855529785, | |
| "Qwen3_0.6B_layer_2_to_uniform": 9.070773124694824, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": 2.2260851860046387, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_4": 2.1856892108917236, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": 2.254146099090576, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": 2.229769468307495, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": 2.2037243843078613, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": 2.2896828651428223, | |
| "Qwen3_0.6B_layer_4_to_uniform": 9.070773124694824, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": 5.400465965270996, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": 5.9340386390686035, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_2": 5.794930458068848, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": 5.900982856750488, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": 6.348906517028809, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": 6.4423675537109375, | |
| "Qwen3_1.7B_layer_2_to_uniform": 9.88111686706543, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": 2.5666661262512207, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": 2.535998821258545, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": 2.4926912784576416, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_4": 2.476747989654541, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": 2.408336877822876, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": 2.6492466926574707, | |
| "Qwen3_1.7B_layer_4_to_uniform": 9.88111686706543, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": 2.4851021766662598, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": 2.303314685821533, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": 2.0016140937805176, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": 2.184553384780884, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_2": 2.121729850769043, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": 2.000966787338257, | |
| "Qwen3_4B_layer_2_to_uniform": 10.104096412658691, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": 3.442514419555664, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": 3.1136765480041504, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": 2.937788486480713, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": 3.0111327171325684, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": 3.0196948051452637, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_4": 2.7799510955810547, | |
| "Qwen3_4B_layer_4_to_uniform": 10.104096412658691 | |
| }, | |
| "mae_hidden_states": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_2": 1.2630091905593872, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": 1.2069993019104004, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": 1.2386506795883179, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": 1.2585456371307373, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": 1.212580919265747, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": 1.2229262590408325, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": 1.0233924388885498, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_4": 0.9251772165298462, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": 0.9622151255607605, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": 0.9760592579841614, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": 0.9428697824478149, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": 0.9486178159713745, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": 1.0079174041748047, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": 0.9031265377998352, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_2": 0.9057611227035522, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": 0.9231780767440796, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": 0.9179145097732544, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": 0.9312993884086609, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": 1.2595539093017578, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": 1.169715166091919, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": 1.1957802772521973, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_4": 1.1877433061599731, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": 1.179739236831665, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": 1.1788952350616455, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": 1.0426846742630005, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": 0.9591526985168457, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": 0.9619539380073547, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": 0.9698508977890015, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_2": 0.9279893636703491, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": 0.9385145902633667, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": 1.1462980508804321, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": 1.0632051229476929, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": 1.0799243450164795, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": 1.0858067274093628, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": 1.0611412525177002, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_4": 1.0519263744354248 | |
| }, | |
| "alignment": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.388671875, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.15625, | |
| "mean_l2_distance": 19.875, | |
| "std_l2_distance": 12.375, | |
| "mean_dimension_correlation": 0.890447998046875, | |
| "std_dimension_correlation": 0.03419125987740356, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.39453125, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.162109375, | |
| "mean_l2_distance": 20.125, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.8867477416992188, | |
| "std_dimension_correlation": 0.035491939390515204, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.39453125, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.154296875, | |
| "mean_l2_distance": 20.125, | |
| "std_l2_distance": 12.1875, | |
| "mean_dimension_correlation": 0.889697265625, | |
| "std_dimension_correlation": 0.03374281347550432, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": { | |
| "mse": 0.390625, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.1591796875, | |
| "mean_l2_distance": 20.0, | |
| "std_l2_distance": 12.5, | |
| "mean_dimension_correlation": 0.8883514404296875, | |
| "std_dimension_correlation": 0.035164283126066044, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.388671875, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.1591796875, | |
| "mean_l2_distance": 20.0, | |
| "std_l2_distance": 12.4375, | |
| "mean_dimension_correlation": 0.8896194458007812, | |
| "std_dimension_correlation": 0.03421083254072828, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 0.388671875, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.15625, | |
| "mean_l2_distance": 19.875, | |
| "std_l2_distance": 12.375, | |
| "mean_dimension_correlation": 0.8904556274414063, | |
| "std_dimension_correlation": 0.034210556225841876, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.26953125, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.1513671875, | |
| "mean_l2_distance": 13.8125, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.923016357421875, | |
| "std_dimension_correlation": 0.029236331051580345, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.263671875, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.150390625, | |
| "mean_l2_distance": 13.5625, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.9244888305664063, | |
| "std_dimension_correlation": 0.02919239611161659, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.271484375, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.16015625, | |
| "mean_l2_distance": 13.875, | |
| "std_l2_distance": 13.0625, | |
| "mean_dimension_correlation": 0.9205032348632812, | |
| "std_dimension_correlation": 0.029844860484086543, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": { | |
| "mse": 0.267578125, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.158203125, | |
| "mean_l2_distance": 13.6875, | |
| "std_l2_distance": 13.0, | |
| "mean_dimension_correlation": 0.9218185424804688, | |
| "std_dimension_correlation": 0.030954341854338954, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": { | |
| "mse": 0.39453125, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.162109375, | |
| "mean_l2_distance": 20.125, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.8868682861328125, | |
| "std_dimension_correlation": 0.03559183889902671, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.26953125, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.1513671875, | |
| "mean_l2_distance": 13.8125, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.9229568481445313, | |
| "std_dimension_correlation": 0.029229316660619842, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.2578125, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.16015625, | |
| "mean_l2_distance": 13.25, | |
| "std_l2_distance": 13.25, | |
| "mean_dimension_correlation": 0.923333740234375, | |
| "std_dimension_correlation": 0.030134410337098863, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": { | |
| "mse": 0.26171875, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.16015625, | |
| "mean_l2_distance": 13.4375, | |
| "std_l2_distance": 13.25, | |
| "mean_dimension_correlation": 0.9219314575195312, | |
| "std_dimension_correlation": 0.03136389625872561, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.25390625, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.1484375, | |
| "mean_l2_distance": 13.0625, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.92755126953125, | |
| "std_dimension_correlation": 0.02898992593261031, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 0.39453125, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.154296875, | |
| "mean_l2_distance": 20.125, | |
| "std_l2_distance": 12.1875, | |
| "mean_dimension_correlation": 0.8896469116210938, | |
| "std_dimension_correlation": 0.03377379140546021, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.263671875, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.150390625, | |
| "mean_l2_distance": 13.5625, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.9245574951171875, | |
| "std_dimension_correlation": 0.029099754782990043, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.2578125, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.16015625, | |
| "mean_l2_distance": 13.25, | |
| "std_l2_distance": 13.25, | |
| "mean_dimension_correlation": 0.9233123779296875, | |
| "std_dimension_correlation": 0.030156395218800952, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.263671875, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.1572265625, | |
| "mean_l2_distance": 13.5625, | |
| "std_l2_distance": 13.0625, | |
| "mean_dimension_correlation": 0.9226715087890625, | |
| "std_dimension_correlation": 0.02929662688468137, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": { | |
| "mse": 0.255859375, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.1533203125, | |
| "mean_l2_distance": 13.125, | |
| "std_l2_distance": 12.8125, | |
| "mean_dimension_correlation": 0.9262313842773438, | |
| "std_dimension_correlation": 0.029011160291782537, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": { | |
| "mse": 0.390625, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.1591796875, | |
| "mean_l2_distance": 20.0, | |
| "std_l2_distance": 12.5, | |
| "mean_dimension_correlation": 0.8883377075195312, | |
| "std_dimension_correlation": 0.03512599620173197, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.271484375, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.16015625, | |
| "mean_l2_distance": 13.875, | |
| "std_l2_distance": 13.0625, | |
| "mean_dimension_correlation": 0.9205001831054688, | |
| "std_dimension_correlation": 0.02990616928878693, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.26171875, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.16015625, | |
| "mean_l2_distance": 13.4375, | |
| "std_l2_distance": 13.25, | |
| "mean_dimension_correlation": 0.922039794921875, | |
| "std_dimension_correlation": 0.03143896607512693, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.263671875, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.1572265625, | |
| "mean_l2_distance": 13.5625, | |
| "std_l2_distance": 13.0625, | |
| "mean_dimension_correlation": 0.9226806640625, | |
| "std_dimension_correlation": 0.029339070768690877, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.24609375, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.1591796875, | |
| "mean_l2_distance": 12.625, | |
| "std_l2_distance": 13.375, | |
| "mean_dimension_correlation": 0.9257278442382812, | |
| "std_dimension_correlation": 0.030489491126206747, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 0.388671875, | |
| "mean_cosine_similarity": 0.89453125, | |
| "std_cosine_similarity": 0.1591796875, | |
| "mean_l2_distance": 20.0, | |
| "std_l2_distance": 12.4375, | |
| "mean_dimension_correlation": 0.8896011352539063, | |
| "std_dimension_correlation": 0.034245117741804325, | |
| "linear_cka": 0.96484375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.267578125, | |
| "mean_cosine_similarity": 0.93359375, | |
| "std_cosine_similarity": 0.158203125, | |
| "mean_l2_distance": 13.6875, | |
| "std_l2_distance": 13.0, | |
| "mean_dimension_correlation": 0.9218338012695313, | |
| "std_dimension_correlation": 0.03096110466803191, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.25390625, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.1484375, | |
| "mean_l2_distance": 13.0625, | |
| "std_l2_distance": 12.625, | |
| "mean_dimension_correlation": 0.9275863647460938, | |
| "std_dimension_correlation": 0.029019101935420444, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.255859375, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.1533203125, | |
| "mean_l2_distance": 13.125, | |
| "std_l2_distance": 12.8125, | |
| "mean_dimension_correlation": 0.9262100219726562, | |
| "std_dimension_correlation": 0.029023808376502022, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.24609375, | |
| "mean_cosine_similarity": 0.9375, | |
| "std_cosine_similarity": 0.1591796875, | |
| "mean_l2_distance": 12.625, | |
| "std_l2_distance": 13.375, | |
| "mean_dimension_correlation": 0.9256805419921875, | |
| "std_dimension_correlation": 0.030472261122601613, | |
| "linear_cka": 0.984375 | |
| }, | |
| "avg_mse": 0.3045572916666667, | |
| "std_mse": 0.061728089131668586, | |
| "avg_mean_cosine_similarity": 0.9216145833333333, | |
| "std_mean_cosine_similarity": 0.01921444452676741, | |
| "avg_std_cosine_similarity": 0.156640625, | |
| "std_std_cosine_similarity": 0.003999537123283247, | |
| "avg_mean_l2_distance": 15.608333333333333, | |
| "std_mean_l2_distance": 3.137845819808374, | |
| "avg_std_l2_distance": 12.7875, | |
| "std_std_l2_distance": 0.34746102898982306, | |
| "avg_mean_dimension_correlation": 0.9121468607584635, | |
| "std_mean_dimension_correlation": 0.016489903679962933, | |
| "avg_std_dimension_correlation": 0.03143026060381273, | |
| "std_std_dimension_correlation": 0.0023529554482308356, | |
| "avg_linear_cka": 0.9778645833333334, | |
| "std_linear_cka": 0.009207119546699838 | |
| } | |
| } | |
| } |