| { | |
| "epoch": 1, | |
| "n_tokens": 4001792, | |
| "global_step": 3908, | |
| "training_metrics": { | |
| "train/loss": 2.5, | |
| "train/contrastive": 2.40625, | |
| "train/recons_loss": 0.671875, | |
| "train/balance_loss": 3.828125, | |
| "train/balance_loss_contrastive": 2.78125, | |
| "train/balance_loss_recons": 1.046875, | |
| "train/contrastive_std": 3.265625, | |
| "train/recons_std": 0.1513671875, | |
| "train/contrastive_min": 0.1162109375, | |
| "train/contrastive_max": 6.9375, | |
| "train/recons_min": 0.55859375, | |
| "train/recons_max": 0.96484375, | |
| "train/Qwen3_0.6B_layer_2": 0.96484375, | |
| "train/Qwen3_0.6B_layer_4": 0.6015625, | |
| "train/Qwen3_1.7B_layer_2": 0.58203125, | |
| "train/Qwen3_1.7B_layer_4": 0.69140625, | |
| "train/Qwen3_4B_layer_2": 0.55859375, | |
| "train/Qwen3_4B_layer_4": 0.625, | |
| "train/contrastives": null, | |
| "train/epoch": 1, | |
| "train/n_tokens": 4001792, | |
| "train/step": 3908 | |
| }, | |
| "eval_metrics": { | |
| "global_step": 3908, | |
| "n_tokens": 4001792, | |
| "kl_divergence": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_2": 10.677733421325684, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": 10.070417404174805, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": 10.500988960266113, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": 10.254755973815918, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": 10.141581535339355, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": 10.209218978881836, | |
| "Qwen3_0.6B_layer_2_to_uniform": 9.070773124694824, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": 6.944426536560059, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_4": 2.526094675064087, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": 2.448215961456299, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": 2.5273706912994385, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": 2.518568515777588, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": 2.5949084758758545, | |
| "Qwen3_0.6B_layer_4_to_uniform": 9.070773124694824, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": 10.105497360229492, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": 6.14721155166626, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_2": 6.3550543785095215, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": 6.340244293212891, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": 6.490333557128906, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": 6.536875247955322, | |
| "Qwen3_1.7B_layer_2_to_uniform": 9.88111686706543, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": 9.303935050964355, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": 2.7055323123931885, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": 2.6092498302459717, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_4": 2.6337990760803223, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": 2.66951322555542, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": 2.742098569869995, | |
| "Qwen3_1.7B_layer_4_to_uniform": 9.88111686706543, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": 8.335909843444824, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": 2.7109298706054688, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": 2.387141704559326, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": 2.432076930999756, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_2": 2.466850519180298, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": 2.3831467628479004, | |
| "Qwen3_4B_layer_2_to_uniform": 10.104096412658691, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": 8.078448295593262, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": 3.5085513591766357, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": 3.5161335468292236, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": 3.7921173572540283, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": 3.47990345954895, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_4": 3.2069830894470215, | |
| "Qwen3_4B_layer_4_to_uniform": 10.104096412658691 | |
| }, | |
| "mae_hidden_states": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_2": 9.193017959594727, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": 5.069667816162109, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": 5.365467071533203, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": 5.159329414367676, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": 5.38785982131958, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": 5.0614728927612305, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": 5.8135833740234375, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_4": 1.0534567832946777, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": 1.0686990022659302, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": 1.093401312828064, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": 1.055011510848999, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": 1.0553429126739502, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": 8.750565528869629, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": 1.014384150505066, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_2": 1.0321710109710693, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": 1.048608422279358, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": 1.0492010116577148, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": 1.053753137588501, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": 5.545437812805176, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": 1.3332059383392334, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": 1.340571641921997, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_4": 1.3302825689315796, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": 1.3458808660507202, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": 1.3539550304412842, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": 7.418992042541504, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": 1.1341036558151245, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": 1.0986826419830322, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": 1.1034446954727173, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_2": 1.0604543685913086, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": 1.0890880823135376, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": 6.221832275390625, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": 1.2409595251083374, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": 1.2512269020080566, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": 1.2450522184371948, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": 1.2315609455108643, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_4": 1.2183749675750732 | |
| }, | |
| "alignment": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 1.3515625, | |
| "mean_cosine_similarity": 0.052001953125, | |
| "std_cosine_similarity": 0.19140625, | |
| "mean_l2_distance": 69.5, | |
| "std_l2_distance": 7.3125, | |
| "mean_dimension_correlation": 0.46465563774108887, | |
| "std_dimension_correlation": 0.1362067287833464, | |
| "linear_cka": 0.5859375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": { | |
| "mse": 1.34375, | |
| "mean_cosine_similarity": 0.056396484375, | |
| "std_cosine_similarity": 0.19140625, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.34375, | |
| "mean_dimension_correlation": 0.46726187616586684, | |
| "std_dimension_correlation": 0.13268670396178475, | |
| "linear_cka": 0.578125 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 1.34375, | |
| "mean_cosine_similarity": 0.055908203125, | |
| "std_cosine_similarity": 0.1904296875, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.34375, | |
| "mean_dimension_correlation": 0.4647917509078979, | |
| "std_dimension_correlation": 0.13471787446655337, | |
| "linear_cka": 0.578125 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": { | |
| "mse": 1.3515625, | |
| "mean_cosine_similarity": 0.056640625, | |
| "std_cosine_similarity": 0.1884765625, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.21875, | |
| "mean_dimension_correlation": 0.4657045602798462, | |
| "std_dimension_correlation": 0.13361017606712636, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 1.34375, | |
| "mean_cosine_similarity": 0.05615234375, | |
| "std_cosine_similarity": 0.1904296875, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.3125, | |
| "mean_dimension_correlation": 0.4670211374759674, | |
| "std_dimension_correlation": 0.13379253598505308, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.359375, | |
| "mean_cosine_similarity": 0.052001953125, | |
| "std_cosine_similarity": 0.19140625, | |
| "mean_l2_distance": 69.5, | |
| "std_l2_distance": 7.3125, | |
| "mean_dimension_correlation": 0.4646653652191162, | |
| "std_dimension_correlation": 0.13617385784126732, | |
| "linear_cka": 0.5859375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.5, | |
| "mean_cosine_similarity": 0.8046875, | |
| "std_cosine_similarity": 0.255859375, | |
| "mean_l2_distance": 25.5, | |
| "std_l2_distance": 19.375, | |
| "mean_dimension_correlation": 0.7786048889160156, | |
| "std_dimension_correlation": 0.079747066045607, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.498046875, | |
| "mean_cosine_similarity": 0.8046875, | |
| "std_cosine_similarity": 0.25390625, | |
| "mean_l2_distance": 25.375, | |
| "std_l2_distance": 19.25, | |
| "mean_dimension_correlation": 0.7792343139648438, | |
| "std_dimension_correlation": 0.07860444177664169, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.51953125, | |
| "mean_cosine_similarity": 0.78515625, | |
| "std_cosine_similarity": 0.275390625, | |
| "mean_l2_distance": 26.5, | |
| "std_l2_distance": 20.25, | |
| "mean_dimension_correlation": 0.7631843566894532, | |
| "std_dimension_correlation": 0.08458475161101357, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": { | |
| "mse": 0.5078125, | |
| "mean_cosine_similarity": 0.79296875, | |
| "std_cosine_similarity": 0.2734375, | |
| "mean_l2_distance": 26.0, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.7681190490722656, | |
| "std_dimension_correlation": 0.08387350384855204, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.3515625, | |
| "mean_cosine_similarity": 0.056396484375, | |
| "std_cosine_similarity": 0.19140625, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.34375, | |
| "mean_dimension_correlation": 0.46729940325021746, | |
| "std_dimension_correlation": 0.13270665905666312, | |
| "linear_cka": 0.578125 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.5, | |
| "mean_cosine_similarity": 0.8046875, | |
| "std_cosine_similarity": 0.255859375, | |
| "mean_l2_distance": 25.5, | |
| "std_l2_distance": 19.375, | |
| "mean_dimension_correlation": 0.7785774230957031, | |
| "std_dimension_correlation": 0.07977299719796638, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.49609375, | |
| "mean_cosine_similarity": 0.796875, | |
| "std_cosine_similarity": 0.2734375, | |
| "mean_l2_distance": 25.25, | |
| "std_l2_distance": 20.375, | |
| "mean_dimension_correlation": 0.7738082885742188, | |
| "std_dimension_correlation": 0.08244148252527034, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": { | |
| "mse": 0.50390625, | |
| "mean_cosine_similarity": 0.79296875, | |
| "std_cosine_similarity": 0.275390625, | |
| "mean_l2_distance": 25.75, | |
| "std_l2_distance": 20.5, | |
| "mean_dimension_correlation": 0.7685455322265625, | |
| "std_dimension_correlation": 0.08636337823761847, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.48046875, | |
| "mean_cosine_similarity": 0.8125, | |
| "std_cosine_similarity": 0.251953125, | |
| "mean_l2_distance": 24.5, | |
| "std_l2_distance": 19.375, | |
| "mean_dimension_correlation": 0.789703369140625, | |
| "std_dimension_correlation": 0.07704774466213117, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.34375, | |
| "mean_cosine_similarity": 0.055908203125, | |
| "std_cosine_similarity": 0.1904296875, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.34375, | |
| "mean_dimension_correlation": 0.4647957801818848, | |
| "std_dimension_correlation": 0.1347461643666133, | |
| "linear_cka": 0.578125 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.49609375, | |
| "mean_cosine_similarity": 0.8046875, | |
| "std_cosine_similarity": 0.25390625, | |
| "mean_l2_distance": 25.375, | |
| "std_l2_distance": 19.25, | |
| "mean_dimension_correlation": 0.779193115234375, | |
| "std_dimension_correlation": 0.07862977772942846, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.4921875, | |
| "mean_cosine_similarity": 0.796875, | |
| "std_cosine_similarity": 0.2734375, | |
| "mean_l2_distance": 25.25, | |
| "std_l2_distance": 20.375, | |
| "mean_dimension_correlation": 0.773846435546875, | |
| "std_dimension_correlation": 0.08246401911605972, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.5078125, | |
| "mean_cosine_similarity": 0.79296875, | |
| "std_cosine_similarity": 0.271484375, | |
| "mean_l2_distance": 26.0, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.7682723999023438, | |
| "std_dimension_correlation": 0.08173679476643078, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": { | |
| "mse": 0.486328125, | |
| "mean_cosine_similarity": 0.80859375, | |
| "std_cosine_similarity": 0.255859375, | |
| "mean_l2_distance": 24.875, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.7830284118652344, | |
| "std_dimension_correlation": 0.07756386958443834, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.359375, | |
| "mean_cosine_similarity": 0.056640625, | |
| "std_cosine_similarity": 0.1884765625, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.21875, | |
| "mean_dimension_correlation": 0.46567630767822266, | |
| "std_dimension_correlation": 0.13364195702919346, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.51953125, | |
| "mean_cosine_similarity": 0.78515625, | |
| "std_cosine_similarity": 0.275390625, | |
| "mean_l2_distance": 26.5, | |
| "std_l2_distance": 20.25, | |
| "mean_dimension_correlation": 0.7631195068359375, | |
| "std_dimension_correlation": 0.08451723229099471, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.50390625, | |
| "mean_cosine_similarity": 0.79296875, | |
| "std_cosine_similarity": 0.275390625, | |
| "mean_l2_distance": 25.75, | |
| "std_l2_distance": 20.5, | |
| "mean_dimension_correlation": 0.7685020446777344, | |
| "std_dimension_correlation": 0.08637723380547804, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.5078125, | |
| "mean_cosine_similarity": 0.79296875, | |
| "std_cosine_similarity": 0.271484375, | |
| "mean_l2_distance": 26.0, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.7681938171386719, | |
| "std_dimension_correlation": 0.08170402844520411, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.5, | |
| "mean_cosine_similarity": 0.7890625, | |
| "std_cosine_similarity": 0.27734375, | |
| "mean_l2_distance": 25.5, | |
| "std_l2_distance": 20.75, | |
| "mean_dimension_correlation": 0.7680191040039063, | |
| "std_dimension_correlation": 0.08532466419571123, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.3515625, | |
| "mean_cosine_similarity": 0.05615234375, | |
| "std_cosine_similarity": 0.1904296875, | |
| "mean_l2_distance": 69.0, | |
| "std_l2_distance": 7.3125, | |
| "mean_dimension_correlation": 0.4670826017856598, | |
| "std_dimension_correlation": 0.13384197305399426, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.5078125, | |
| "mean_cosine_similarity": 0.79296875, | |
| "std_cosine_similarity": 0.2734375, | |
| "mean_l2_distance": 26.0, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.7681541442871094, | |
| "std_dimension_correlation": 0.08380469982339137, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.48046875, | |
| "mean_cosine_similarity": 0.8125, | |
| "std_cosine_similarity": 0.251953125, | |
| "mean_l2_distance": 24.5, | |
| "std_l2_distance": 19.375, | |
| "mean_dimension_correlation": 0.7896865844726563, | |
| "std_dimension_correlation": 0.07704049416139637, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.48828125, | |
| "mean_cosine_similarity": 0.80859375, | |
| "std_cosine_similarity": 0.255859375, | |
| "mean_l2_distance": 24.875, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.7829521179199219, | |
| "std_dimension_correlation": 0.07745501980282286, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.5, | |
| "mean_cosine_similarity": 0.7890625, | |
| "std_cosine_similarity": 0.27734375, | |
| "mean_l2_distance": 25.5, | |
| "std_l2_distance": 20.75, | |
| "mean_dimension_correlation": 0.7680793762207031, | |
| "std_dimension_correlation": 0.08533230341332358, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "avg_mse": 0.783203125, | |
| "std_mse": 0.4008924066126565, | |
| "avg_mean_cosine_similarity": 0.5505045572916667, | |
| "std_mean_cosine_similarity": 0.3501489988188365, | |
| "avg_std_cosine_similarity": 0.24108072916666667, | |
| "std_std_cosine_similarity": 0.036733753214407784, | |
| "avg_mean_l2_distance": 40.05, | |
| "std_mean_l2_distance": 20.54668464740723, | |
| "avg_std_l2_distance": 15.74375, | |
| "std_std_l2_distance": 5.980928892460323, | |
| "avg_mean_dimension_correlation": 0.6713259566823642, | |
| "std_mean_dimension_correlation": 0.14540630815784578, | |
| "avg_std_dimension_correlation": 0.09921700445503585, | |
| "std_std_dimension_correlation": 0.024890230217464338, | |
| "avg_linear_cka": 0.85, | |
| "std_linear_cka": 0.19227216969593736 | |
| } | |
| } | |
| } |