Image-Text-to-Text
Transformers
Safetensors
English
Helium1_VL_2B
custom_code
Helium1-VL-2B / config.json
ameroyer's picture
Super-squash branch 'main' using huggingface_hub
1126ea7 verified
{
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_helium1_casa.Helium1CASAConfig",
"AutoModel": "modeling_helium1_casa.V2Helium1"
},
"bos_token_id": 1,
"casa_attention": false,
"casa_delta_w": true,
"casa_use_asymetric_qkv": true,
"casa_windows": "images",
"eos_token_id": null,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 8192,
"mask_squash_blockwise": false,
"max_position_embeddings": 4096,
"mlp_bias": false,
"model_type": "Helium1_VL_2B",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 3,
"post_image_tokens": [],
"pre_image_tokens": [],
"pretraining_tp": 1,
"rms_norm_eps": 1e-08,
"rope_scaling": null,
"rope_theta": 20000.0,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.51.3",
"use_cache": true,
"vision_config": {
"depth": 32,
"fullatt_block_indexes": [
7,
15,
23,
31
],
"hidden_act": "silu",
"hidden_size": 1280,
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"in_channels": 3,
"in_chans": 3,
"intermediate_size": 3420,
"model_type": "qwen2_5_vl",
"num_heads": 16,
"out_dim": 2048,
"out_hidden_size": 2048,
"patch_size": 14,
"spatial_merge_size": 2,
"spatial_patch_size": 14,
"temporal_patch_size": 1,
"tokens_per_second": 2,
"window_size": 112
},
"vocab_size": 64000,
"xa_custom_norm": false,
"xa_layers": [],
"xa_norm_on_images": false,
"xa_order": "ca_first",
"xa_update_image_embeds": false
}