| { | |
| "attn_eq": true, | |
| "attn_type": "Trittention", | |
| "autopad": true, | |
| "causal_attn": true, | |
| "d_head": 64, | |
| "d_mlp": 2048, | |
| "d_model": 512, | |
| "d_vocab": 50304, | |
| "debug": true, | |
| "dropout": 0.1, | |
| "dt_head": 64, | |
| "has_mlp": true, | |
| "init_range": 0.01, | |
| "is_gated": false, | |
| "layer_norm_eps": 1e-05, | |
| "look_backward": 1, | |
| "mlp_type": "all", | |
| "n_ctx": 65, | |
| "n_heads": 8, | |
| "n_layers": 1, | |
| "nt_heads": 2, | |
| "order_attn": false, | |
| "pad_value": 0, | |
| "share_input_output_embed": true, | |
| "use_rotary": false, | |
| "window_size": 16, | |
| "with_ln": true | |
| } |