| { | |
| "train/loss": 2.515625, | |
| "train/contrastive": 2.421875, | |
| "train/recons_loss": 0.65234375, | |
| "train/balance_loss": 3.84375, | |
| "train/balance_loss_contrastive": 2.796875, | |
| "train/balance_loss_recons": 1.0390625, | |
| "train/contrastive_std": 3.296875, | |
| "train/recons_std": 0.1279296875, | |
| "train/contrastive_min": 0.10791015625, | |
| "train/contrastive_max": 7.0, | |
| "train/recons_min": 0.546875, | |
| "train/recons_max": 0.89453125, | |
| "train/Qwen3_0.6B_layer_2": 0.89453125, | |
| "train/Qwen3_0.6B_layer_4": 0.58984375, | |
| "train/Qwen3_1.7B_layer_2": 0.578125, | |
| "train/Qwen3_1.7B_layer_4": 0.69140625, | |
| "train/Qwen3_4B_layer_2": 0.546875, | |
| "train/Qwen3_4B_layer_4": 0.61328125, | |
| "train/contrastives": null, | |
| "train/epoch": 1, | |
| "train/n_tokens": 5002240, | |
| "train/step": 4885 | |
| } |