| { | |
| "epoch": 1, | |
| "n_tokens": 3001344, | |
| "global_step": 2931, | |
| "training_metrics": { | |
| "train/loss": 2.515625, | |
| "train/contrastive": 2.421875, | |
| "train/recons_loss": 0.671875, | |
| "train/balance_loss": 3.75, | |
| "train/balance_loss_contrastive": 2.71875, | |
| "train/balance_loss_recons": 1.0390625, | |
| "train/contrastive_std": 3.25, | |
| "train/recons_std": 0.138671875, | |
| "train/contrastive_min": 0.146484375, | |
| "train/contrastive_max": 6.9375, | |
| "train/recons_min": 0.56640625, | |
| "train/recons_max": 0.9375, | |
| "train/Qwen3_0.6B_layer_2": 0.9375, | |
| "train/Qwen3_0.6B_layer_4": 0.59765625, | |
| "train/Qwen3_1.7B_layer_2": 0.59375, | |
| "train/Qwen3_1.7B_layer_4": 0.703125, | |
| "train/Qwen3_4B_layer_2": 0.56640625, | |
| "train/Qwen3_4B_layer_4": 0.6328125, | |
| "train/contrastives": null, | |
| "train/epoch": 1, | |
| "train/n_tokens": 3001344, | |
| "train/step": 2931 | |
| }, | |
| "eval_metrics": { | |
| "global_step": 2931, | |
| "n_tokens": 3001344, | |
| "kl_divergence": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_2": 11.318835258483887, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": 9.138021469116211, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": 9.61973762512207, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": 9.007281303405762, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": 8.960853576660156, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": 8.789403915405273, | |
| "Qwen3_0.6B_layer_2_to_uniform": 9.070773124694824, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": 7.3046698570251465, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_4": 2.55082368850708, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": 2.5602962970733643, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": 2.592942714691162, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": 2.588857650756836, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": 2.6625943183898926, | |
| "Qwen3_0.6B_layer_4_to_uniform": 9.070773124694824, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": 10.131369590759277, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": 5.891963481903076, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_2": 6.430274963378906, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": 6.0684638023376465, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": 5.9689507484436035, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": 6.356847286224365, | |
| "Qwen3_1.7B_layer_2_to_uniform": 9.88111686706543, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": 8.19615364074707, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": 2.8310694694519043, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": 2.7546491622924805, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_4": 2.7474663257598877, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": 2.857220411300659, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": 2.925436019897461, | |
| "Qwen3_1.7B_layer_4_to_uniform": 9.88111686706543, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": 7.565979957580566, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": 2.9663586616516113, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": 2.719478130340576, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": 2.741952657699585, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_2": 2.7755935192108154, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": 2.7375831604003906, | |
| "Qwen3_4B_layer_2_to_uniform": 10.104096412658691, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": 7.4653778076171875, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": 3.67035174369812, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": 3.566011905670166, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": 3.7160496711730957, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": 3.552424907684326, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_4": 3.459855556488037, | |
| "Qwen3_4B_layer_4_to_uniform": 10.104096412658691 | |
| }, | |
| "mae_hidden_states": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_2": 15.485275268554688, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": 22.359243392944336, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": 21.841341018676758, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": 20.851577758789062, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": 23.41849136352539, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": 22.13389015197754, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": 8.68209457397461, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_4": 1.0910885334014893, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": 1.0663363933563232, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": 1.1295608282089233, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": 1.096497654914856, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": 1.0976781845092773, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": 9.745889663696289, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": 1.073387622833252, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_2": 1.0651912689208984, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": 1.1097475290298462, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": 1.102055311203003, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": 1.1042507886886597, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": 6.085488319396973, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": 1.443469762802124, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": 1.407573938369751, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_4": 1.394163966178894, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": 1.4274914264678955, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": 1.423639178276062, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": 6.723683834075928, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": 1.2199777364730835, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": 1.1646456718444824, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": 1.1640838384628296, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_2": 1.1155877113342285, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": 1.1568272113800049, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": 8.499415397644043, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": 1.2985743284225464, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": 1.2958557605743408, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": 1.2903549671173096, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": 1.2823046445846558, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_4": 1.2616506814956665 | |
| }, | |
| "alignment": { | |
| "Qwen3_0.6B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 1.421875, | |
| "mean_cosine_similarity": -0.03369140625, | |
| "std_cosine_similarity": 0.109375, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.90625, | |
| "mean_dimension_correlation": 0.254237837344408, | |
| "std_dimension_correlation": 0.16181929189675745, | |
| "linear_cka": 0.5859375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_2": { | |
| "mse": 1.421875, | |
| "mean_cosine_similarity": -0.0284423828125, | |
| "std_cosine_similarity": 0.10888671875, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.890625, | |
| "mean_dimension_correlation": 0.25683254674077033, | |
| "std_dimension_correlation": 0.16029215327593901, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 1.4140625, | |
| "mean_cosine_similarity": -0.0252685546875, | |
| "std_cosine_similarity": 0.1083984375, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.875, | |
| "mean_dimension_correlation": 0.25395019352436066, | |
| "std_dimension_correlation": 0.15926056622745546, | |
| "linear_cka": 0.578125 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_2": { | |
| "mse": 1.421875, | |
| "mean_cosine_similarity": -0.03271484375, | |
| "std_cosine_similarity": 0.1064453125, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.796875, | |
| "mean_dimension_correlation": 0.24886183738708495, | |
| "std_dimension_correlation": 0.15849261736593726, | |
| "linear_cka": 0.55859375 | |
| }, | |
| "Qwen3_0.6B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 1.421875, | |
| "mean_cosine_similarity": -0.033203125, | |
| "std_cosine_similarity": 0.109375, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.890625, | |
| "mean_dimension_correlation": 0.256584095954895, | |
| "std_dimension_correlation": 0.15873214025442897, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.4296875, | |
| "mean_cosine_similarity": -0.03369140625, | |
| "std_cosine_similarity": 0.109375, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.90625, | |
| "mean_dimension_correlation": 0.2542317323386669, | |
| "std_dimension_correlation": 0.16183266276519212, | |
| "linear_cka": 0.5859375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.734375, | |
| "mean_cosine_similarity": 0.65625, | |
| "std_cosine_similarity": 0.28515625, | |
| "mean_l2_distance": 37.25, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.6187647342681885, | |
| "std_dimension_correlation": 0.11470426666838326, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.7265625, | |
| "mean_cosine_similarity": 0.66015625, | |
| "std_cosine_similarity": 0.279296875, | |
| "mean_l2_distance": 37.0, | |
| "std_l2_distance": 19.25, | |
| "mean_dimension_correlation": 0.6220208525657653, | |
| "std_dimension_correlation": 0.11040039509848326, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.76171875, | |
| "mean_cosine_similarity": 0.62890625, | |
| "std_cosine_similarity": 0.302734375, | |
| "mean_l2_distance": 38.75, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.592758321762085, | |
| "std_dimension_correlation": 0.11886540241980308, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_0.6B_layer_4_to_Qwen3_4B_layer_4": { | |
| "mse": 0.74609375, | |
| "mean_cosine_similarity": 0.63671875, | |
| "std_cosine_similarity": 0.302734375, | |
| "mean_l2_distance": 38.25, | |
| "std_l2_distance": 20.25, | |
| "mean_dimension_correlation": 0.6037769317626953, | |
| "std_dimension_correlation": 0.11647753822253991, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.4296875, | |
| "mean_cosine_similarity": -0.0284423828125, | |
| "std_cosine_similarity": 0.10888671875, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.890625, | |
| "mean_dimension_correlation": 0.25684744566679, | |
| "std_dimension_correlation": 0.16032274573798164, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.734375, | |
| "mean_cosine_similarity": 0.65625, | |
| "std_cosine_similarity": 0.28515625, | |
| "mean_l2_distance": 37.25, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.6187384128570557, | |
| "std_dimension_correlation": 0.11471572089316741, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.734375, | |
| "mean_cosine_similarity": 0.6484375, | |
| "std_cosine_similarity": 0.30078125, | |
| "mean_l2_distance": 37.5, | |
| "std_l2_distance": 20.375, | |
| "mean_dimension_correlation": 0.6119367599487304, | |
| "std_dimension_correlation": 0.1157440646478159, | |
| "linear_cka": 0.99609375 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_2": { | |
| "mse": 0.75390625, | |
| "mean_cosine_similarity": 0.63671875, | |
| "std_cosine_similarity": 0.30078125, | |
| "mean_l2_distance": 38.25, | |
| "std_l2_distance": 20.375, | |
| "mean_dimension_correlation": 0.5996460914611816, | |
| "std_dimension_correlation": 0.11944124129277625, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_1.7B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.70703125, | |
| "mean_cosine_similarity": 0.67578125, | |
| "std_cosine_similarity": 0.27734375, | |
| "mean_l2_distance": 36.0, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.638215160369873, | |
| "std_dimension_correlation": 0.10975611081697591, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.4140625, | |
| "mean_cosine_similarity": -0.0252685546875, | |
| "std_cosine_similarity": 0.1083984375, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.875, | |
| "mean_dimension_correlation": 0.25395837128162385, | |
| "std_dimension_correlation": 0.15926372552177567, | |
| "linear_cka": 0.578125 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.7265625, | |
| "mean_cosine_similarity": 0.66015625, | |
| "std_cosine_similarity": 0.279296875, | |
| "mean_l2_distance": 37.0, | |
| "std_l2_distance": 19.25, | |
| "mean_dimension_correlation": 0.6219659209251404, | |
| "std_dimension_correlation": 0.11032863879333923, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.734375, | |
| "mean_cosine_similarity": 0.6484375, | |
| "std_cosine_similarity": 0.30078125, | |
| "mean_l2_distance": 37.5, | |
| "std_l2_distance": 20.375, | |
| "mean_dimension_correlation": 0.6119108200073242, | |
| "std_dimension_correlation": 0.1157383378132106, | |
| "linear_cka": 0.99609375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.7578125, | |
| "mean_cosine_similarity": 0.6328125, | |
| "std_cosine_similarity": 0.298828125, | |
| "mean_l2_distance": 38.5, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.5979020118713378, | |
| "std_dimension_correlation": 0.1151705814719715, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_1.7B_layer_4_to_Qwen3_4B_layer_4": { | |
| "mse": 0.71875, | |
| "mean_cosine_similarity": 0.6640625, | |
| "std_cosine_similarity": 0.28125, | |
| "mean_l2_distance": 36.75, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.6274345874786377, | |
| "std_dimension_correlation": 0.11253210388812478, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.4296875, | |
| "mean_cosine_similarity": -0.03271484375, | |
| "std_cosine_similarity": 0.1064453125, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.796875, | |
| "mean_dimension_correlation": 0.24887723177671434, | |
| "std_dimension_correlation": 0.15850834600861563, | |
| "linear_cka": 0.55859375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.76171875, | |
| "mean_cosine_similarity": 0.62890625, | |
| "std_cosine_similarity": 0.302734375, | |
| "mean_l2_distance": 38.75, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.5927883148193359, | |
| "std_dimension_correlation": 0.11887853166661289, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.75390625, | |
| "mean_cosine_similarity": 0.63671875, | |
| "std_cosine_similarity": 0.30078125, | |
| "mean_l2_distance": 38.25, | |
| "std_l2_distance": 20.375, | |
| "mean_dimension_correlation": 0.5995779991149902, | |
| "std_dimension_correlation": 0.1193691675179003, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.7578125, | |
| "mean_cosine_similarity": 0.6328125, | |
| "std_cosine_similarity": 0.298828125, | |
| "mean_l2_distance": 38.5, | |
| "std_l2_distance": 20.125, | |
| "mean_dimension_correlation": 0.5978128433227539, | |
| "std_dimension_correlation": 0.11512506347641102, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_2_to_Qwen3_4B_layer_4": { | |
| "mse": 0.75390625, | |
| "mean_cosine_similarity": 0.62890625, | |
| "std_cosine_similarity": 0.3046875, | |
| "mean_l2_distance": 38.5, | |
| "std_l2_distance": 20.625, | |
| "mean_dimension_correlation": 0.5955796241760254, | |
| "std_dimension_correlation": 0.11906185378925987, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_2": { | |
| "mse": 1.4296875, | |
| "mean_cosine_similarity": -0.033203125, | |
| "std_cosine_similarity": 0.109375, | |
| "mean_l2_distance": 72.5, | |
| "std_l2_distance": 3.890625, | |
| "mean_dimension_correlation": 0.2565764158964157, | |
| "std_dimension_correlation": 0.1587071816624074, | |
| "linear_cka": 0.57421875 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_0.6B_layer_4": { | |
| "mse": 0.74609375, | |
| "mean_cosine_similarity": 0.63671875, | |
| "std_cosine_similarity": 0.302734375, | |
| "mean_l2_distance": 38.25, | |
| "std_l2_distance": 20.25, | |
| "mean_dimension_correlation": 0.6037120819091797, | |
| "std_dimension_correlation": 0.11639985412027169, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_2": { | |
| "mse": 0.70703125, | |
| "mean_cosine_similarity": 0.67578125, | |
| "std_cosine_similarity": 0.27734375, | |
| "mean_l2_distance": 36.0, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.6382188320159912, | |
| "std_dimension_correlation": 0.10972459865917429, | |
| "linear_cka": 0.984375 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_1.7B_layer_4": { | |
| "mse": 0.71875, | |
| "mean_cosine_similarity": 0.6640625, | |
| "std_cosine_similarity": 0.28125, | |
| "mean_l2_distance": 36.75, | |
| "std_l2_distance": 19.5, | |
| "mean_dimension_correlation": 0.6273346900939941, | |
| "std_dimension_correlation": 0.1124933006393999, | |
| "linear_cka": 0.98046875 | |
| }, | |
| "Qwen3_4B_layer_4_to_Qwen3_4B_layer_2": { | |
| "mse": 0.75390625, | |
| "mean_cosine_similarity": 0.62890625, | |
| "std_cosine_similarity": 0.3046875, | |
| "mean_l2_distance": 38.5, | |
| "std_l2_distance": 20.625, | |
| "mean_dimension_correlation": 0.5955384254455567, | |
| "std_dimension_correlation": 0.11905433194805992, | |
| "linear_cka": 0.98828125 | |
| }, | |
| "avg_mse": 0.9674479166666666, | |
| "std_mse": 0.32276016873074226, | |
| "avg_mean_cosine_similarity": 0.4210286458333333, | |
| "std_mean_cosine_similarity": 0.31965592389258923, | |
| "avg_std_cosine_similarity": 0.23173828125, | |
| "std_std_cosine_similarity": 0.08757208624967457, | |
| "avg_mean_l2_distance": 49.28333333333333, | |
| "std_mean_l2_distance": 16.43189648890907, | |
| "avg_std_l2_distance": 14.598958333333334, | |
| "std_std_l2_distance": 7.594291533045653, | |
| "avg_mean_dimension_correlation": 0.49188637080291897, | |
| "std_mean_dimension_correlation": 0.16857047305704442, | |
| "avg_std_dimension_correlation": 0.1300404178186724, | |
| "std_std_dimension_correlation": 0.021172306029780062, | |
| "avg_linear_cka": 0.8479166666666667, | |
| "std_linear_cka": 0.19363585907609904 | |
| } | |
| } | |
| } |