{ "architectures": [ "Qwen3ForCausalLM" ], "model_type": "qwen3", "torch_dtype": "bfloat16", "transformers_version": "4.44.2", "vocab_size": 151936, "use_cache": true, "rope_theta": 1000000.0, "max_position_embeddings": 131072, "thinking_tokens": 512000, "_name_or_path": "zenlm/zen-coder-480b-instruct", "hidden_size": 5120, "num_hidden_layers": 64, "num_attention_heads": 40, "num_key_value_heads": 8, "intermediate_size": 27648, "num_experts": 16, "num_experts_per_tok": 2, "expert_interval": 1, "_architecture_type": "moe", "_total_params": "480B", "_active_params": "30B" }