diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2c85cfcf670518dacb1626ca9ccd567b0325e74 --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,84 @@ +{ + "version": "0.1.0", + "model_type": "gemma2", + "quantization": "q4f32_1", + "model_config": { + "hidden_size": 2304, + "intermediate_size": 9216, + "attention_bias": false, + "num_attention_heads": 8, + "num_key_value_heads": 4, + "head_dim": 256, + "num_hidden_layers": 26, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "hidden_activation": "gelu_pytorch_tanh", + "position_embedding_base": 10000.0, + "context_window_size": 4096, + "prefill_chunk_size": 4096, + "tensor_parallel_shards": 1, + "max_batch_size": 128, + "attn_logit_softcapping": 50.0, + "final_logit_softcapping": 30.0, + "query_pre_attn_scalar": 224, + "sliding_window": 4096 + }, + "vocab_size": 256000, + "context_window_size": 4096, + "sliding_window_size": -1, + "prefill_chunk_size": 4096, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "pipeline_parallel_stages": 1, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 1.0, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "tokenizer_config.json" + ], + "tokenizer_info": { + "token_postproc_method": "byte_fallback", + "prepend_space_in_encode": false, + "strip_space_in_decode": false + }, + "conv_template": { + "name": "gemma_instruction", + "system_template": "{system_message}", + "system_message": "", + "system_prefix_token_ids": [ + 2 + ], + "add_role_after_system_message": true, + "roles": { + "user": "user", + "assistant": "model" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "\n" + ], + "role_content_sep": "\n", + "role_empty_sep": "\n", + "stop_str": [ + "" + ], + "stop_token_ids": [ + 1, + 107 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 1 +} \ No newline at end of file diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..c86885dce452b6230f33a5d8c6d2304e9aa9a946 --- /dev/null +++ b/ndarray-cache-b16.json @@ -0,0 +1,3705 @@ +{ + "metadata": { + "ParamSize": 315, + "ParamBytes": 1634780160.0, + "BitsPerParam": 5.002498464347751 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 294912000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 294912000, + "byteOffset": 0 + } + ], + "md5sum": "1056a334963736d59422e83e31cb241d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 36864000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 36864000, + "byteOffset": 0 + } + ], + "md5sum": "625a34c9eab976edd9064fcd0bcd959b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33182208, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 4608 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10621440 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11948544 + } + ], + "md5sum": "6acbc70fe032f619ec0d7bf36a54a33c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "175a78fbf7424be2f32f50fbee026857" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "7046dd6a28b8387479009365eae5ff4c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d40768de670da0b84ec61b5931183d43" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "de00bdc0805a54eb5e45ed0909b7ed87" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "80b8a5cdb189dfe18e2238a896d52429" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "499c6f1c3955b7ac48e1baf54f03bc75" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "d31be670584c3046d1e37cfdb9d9ddd6" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d4c11e91b9585fb3780730c909a30752" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "57a6b07fdb8aa49529a7b1b4959757ea" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "67614580b3f2a8a38e85aa9b0267e77a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "3a8f3b58cadd6366dd53d8378d3f8af1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "2f0476c0a82d1270be330f2b11b0095e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b0a8a0bf26d878221850484946ce030a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f1a76e763b1dc0e9c67df48528bc0604" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "c9afc0e296625eb4651b8faf2a0fb6b0" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "95b6cee3c0471d1dbb0fea2e1baf4abd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f67c0fd6ce5403528b801dcad10d1816" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "584e8d4ef70fb14e3772af0979bd3f73" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "0adcc615d1d9695b4ef590c89de35db7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "14694eefe69a69a9223867449c09d11c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "3fbc7ba0b3c4e0f20ae744e85561185e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "fb2183e1778ec2dfd9cd348116591da5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "a76f404041e3f6647d86cbc041bb144e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d30f7600b2debb90cf9851c84ea90ab4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31864320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10630656 + } + ], + "md5sum": "e33528541ac66995a65d4059e025adb4" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "2334942f272bf7f3771df44bdc75a8f9" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33200640, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2654208 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7372800 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7962624 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10321920 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10616832 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10621440 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21238272 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22565376 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25219584 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25224192 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25228800 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25233408 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29952000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30541824 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32901120 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33196032 + } + ], + "md5sum": "9ee5e7a7f0213b4d55deb8cea35b3a8e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "babbe4d246c14e00495e33536df1e291" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "21f58b7fc7ca5c6dbf8d2953aa7abdb9" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "44ed88825a210c8e2033400e70300767" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "93e69ae37c7383b0004a29cc419c8726" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "29656ba96b5514b3401910b2cf1b2093" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "550983d43939b0d3d752d29349cefe9c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "e041c162985da23150d03713fec452c5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c816f0c057f977aef0d917927c495952" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d5cce7273b21673fa7394f92f0b7e53d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7e6faa752093ea3d32faa7e8226b3dd1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 31882752, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11943936 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11948544 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11953152 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11957760 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 11962368 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 22579200 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 23906304 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 26560512 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 26565120 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 26569728 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26574336 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 31292928 + } + ], + "md5sum": "30870903e1e4927fff8d281221db6cf9" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 2658816, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 2359296 + }, + { + "name": "model.norm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + } + ], + "md5sum": "460980e964a2321290ce2641bdadbf75" + } + ] +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..c03b882c12281b560a546b341c87cbbd3c3fe643 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,3705 @@ +{ + "metadata": { + "ParamSize": 315, + "ParamBytes": 1634780160.0, + "BitsPerParam": 5.002498464347751 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 294912000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 294912000, + "byteOffset": 0 + } + ], + "md5sum": "1056a334963736d59422e83e31cb241d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 36864000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 36864000, + "byteOffset": 0 + } + ], + "md5sum": "625a34c9eab976edd9064fcd0bcd959b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33182208, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 4608 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10621440 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11948544 + } + ], + "md5sum": "6acbc70fe032f619ec0d7bf36a54a33c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "175a78fbf7424be2f32f50fbee026857" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "7046dd6a28b8387479009365eae5ff4c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d40768de670da0b84ec61b5931183d43" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "de00bdc0805a54eb5e45ed0909b7ed87" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "80b8a5cdb189dfe18e2238a896d52429" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "499c6f1c3955b7ac48e1baf54f03bc75" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "d31be670584c3046d1e37cfdb9d9ddd6" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d4c11e91b9585fb3780730c909a30752" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "57a6b07fdb8aa49529a7b1b4959757ea" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "67614580b3f2a8a38e85aa9b0267e77a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "3a8f3b58cadd6366dd53d8378d3f8af1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "2f0476c0a82d1270be330f2b11b0095e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b0a8a0bf26d878221850484946ce030a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f1a76e763b1dc0e9c67df48528bc0604" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "c9afc0e296625eb4651b8faf2a0fb6b0" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "95b6cee3c0471d1dbb0fea2e1baf4abd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f67c0fd6ce5403528b801dcad10d1816" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "584e8d4ef70fb14e3772af0979bd3f73" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "0adcc615d1d9695b4ef590c89de35db7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "14694eefe69a69a9223867449c09d11c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "3fbc7ba0b3c4e0f20ae744e85561185e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "fb2183e1778ec2dfd9cd348116591da5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "a76f404041e3f6647d86cbc041bb144e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d30f7600b2debb90cf9851c84ea90ab4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31864320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10630656 + } + ], + "md5sum": "e33528541ac66995a65d4059e025adb4" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "2334942f272bf7f3771df44bdc75a8f9" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33200640, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2654208 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7372800 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7962624 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10321920 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10616832 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10621440 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21238272 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22565376 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25219584 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25224192 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25228800 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25233408 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29952000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30541824 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32901120 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33196032 + } + ], + "md5sum": "9ee5e7a7f0213b4d55deb8cea35b3a8e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "babbe4d246c14e00495e33536df1e291" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "21f58b7fc7ca5c6dbf8d2953aa7abdb9" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "44ed88825a210c8e2033400e70300767" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "93e69ae37c7383b0004a29cc419c8726" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "29656ba96b5514b3401910b2cf1b2093" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "550983d43939b0d3d752d29349cefe9c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "e041c162985da23150d03713fec452c5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c816f0c057f977aef0d917927c495952" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d5cce7273b21673fa7394f92f0b7e53d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7e6faa752093ea3d32faa7e8226b3dd1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 31882752, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11943936 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11948544 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11953152 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11957760 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 11962368 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 22579200 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 23906304 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 26560512 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 26565120 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 26569728 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26574336 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 31292928 + } + ], + "md5sum": "30870903e1e4927fff8d281221db6cf9" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 2658816, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 2359296 + }, + { + "name": "model.norm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + } + ], + "md5sum": "460980e964a2321290ce2641bdadbf75" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..eed4744d1abdc9b6a43b143f3210a98c28d2dc79 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b43e33b7e2ce6df4d05647c76d8b4c44e3dd510087a9453ef200bc9fba0612 +size 294912000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..fce8751005a0801d76aaa81f5fe785b2dca0fcb8 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83531e44766c063ef2d8275954c2558d77dc58e7804e642a06b995702e676490 +size 36864000 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..d833676572a799c17f9633268ed593eeebcdcc28 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab747b90c3ea26dd5a1b9a5a9235264eaebe3e4150264b0bbc00123e1b89157 +size 33214464 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..40d29cb05a592ed9dca5cbd3ccc7cd03aff8fd98 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17622b67062bbae61ce3b4cfd63bdc5bcecefbd1cafe921c39b79fe24f1a71ff +size 33177600 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..f808ddf61f587016ee34a3338450965fed9967c9 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1047e18cc4e0e2d461a6ddab7c763e5667574ffcd1ababea3335f8a5b3b2eadd +size 21233664 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddcb156c756d2d7e689a5ef71343fab2dcf75451 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba556f91a7ba7819ffa1eb7328208829373c04dc6a06d1b414f73314b1ebbd3 +size 33214464 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ec588211c10b28b70b777135afbfd57005113dc --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126cc854f1c2c70c62539162248ee0c27a6c40fb7e388cdb5f52cbc7c8edfb1e +size 33177600 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..f27a365ad9d7c2f945fcd03fc6201ef67442c9bd --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbae3694219920bb7fcb2362be865a3ecb49505b41a1b4eb13a303e200b69b6 +size 21233664 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..49c5f5336e50146cfd8b91a7ac5f7633e0e7d852 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63766b56fd061b049fee0ef8aa9edc23590bf30b2bcf9c7a9617a866aa03a389 +size 33214464 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..ede17e6802bb3f38d699c7ee48162f54cf739aa9 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff715b1e3748653c53656f6b1a8ae8208b65ecbf8980a4fc2e7f3acd9eae5137 +size 33177600 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..5146ffcc1171bb7a7b48f29cc46aa7a79bcb1bce --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:769eb052090fcad757bbe13f1c8d98f5389090d1ca76e8e7a65c08fdd3da632b +size 21233664 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..963dfa8ae0d7390ef6d949364b1bf0e3ccbedbb3 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57daeae836f74837213f9dfe1464171c21168297819c854a01f3ff68a2a79218 +size 33214464 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..fba4f6dedb621c5d48d10c3d775f76c129f7daaf --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f066e08ad385253415a3ed84ec600d9fddf031850b4c39361eb2610cdd4438aa +size 33182208 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1557d609a4342103739b583291b4e95348c51ba --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858559f568c05c14fbb17fad2d2365bbeb1edf05d2c01470cdabde660af0a83d +size 33177600 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..42471fd7040ac6b1d79eee21f9b681494b72d020 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b071afd77a652cda7f4b63ca39ecf042dda244787212ad81d18bc3a0615dcbd +size 21233664 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..edddffca05985c9fc81e7e8dd5e19177dc101c1a --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b507980363b8823ca170fe03ac42fce07d10e334fc8d8c0678162f8bc1a804e6 +size 33214464 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..82e95b9d3292e6415b52d89430d8b5debc685b61 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145bb4711f80a21aa9c1a02344bd155c9dc0b596875c0a303f476a22e0689dec +size 33177600 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..a691018f977a3c2a9c39f34a1bd5000482a98248 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c78adc7e15c0daadfea29398b683d866ec6cfc7d39892e312ddb662f18f4e3 +size 21233664 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..427b58fc6b083e0748f35e46887bea67d5a8e423 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac32398ff7454fafcd673d8c172bb536375e65a6ef06613d9a414976391011f +size 33214464 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ea3d64152cbbf2fc4e4400fe0af099a4fd40fef --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b68feab3ff904388f5ce3ad1907d21897e862f99a91746ed4d2a2974418cf64 +size 33177600 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..46acd7723aaa80dd410b308d46f84990596af30e --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc41ba58b79908cbe2548cd81025e752b01c6316c8c41d9c7f0a4e7182974c86 +size 31864320 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..a876620714c44b15029d4eefee4b813dfaa51eec --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0303ed7ab3d6da008bb7ea96415b4d273eb8488439330634e9bf3f1a9baa3226 +size 21233664 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..83e121639f0940a8cf6a24d2318b27720376967c --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8fb82b07213b40d6ae479b9dc43b8655c0d3058b0dc225396d13044757708f +size 33200640 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..757a7f86421e470adcac878ec1f36c25de5a432b --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26752994cbb0c9caa670d0c016b1aadf65648fb87fc112c5e3eed24e975dc20f +size 21233664 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..e024e74a8e8b8305f77e7f41adc794b9c62ed929 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b613af83b6288cc52837974ad58f85610c540dd81879178be60c2bb2f2d01faa +size 33177600 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..6804b5a31466c241283a7430ab324d1c4ea5ca5e --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88d2bcccd09f93cec4996a415a893839d2480644f65cb1b469a58ff98504a16 +size 21233664 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..7235791380c788c3b89b08d036fd1193b2bbb2ac --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da375bb80892074c8caf7226c119f019aeaf8927a655a0284543bea6691896c +size 33214464 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..ed5c4d0cbff80b451f0545a5351105f3a7feaf24 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ae3b6f2290aef19e415fcfdf57f5c7b518352bce68b5f275f476625adcaaec +size 33177600 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..480db558fffc0201413a8ee6627559013c47a749 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebfa1b1abbb619ed09dbb07985e5df7696f05dccc2c29a3908977ccd104ddb0f +size 21233664 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a6c051658fdb5f1c88e045e28c805bb23bdd93f --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e196b995ddff0b515bcc48a04a49908a86aaa5c1b23bad9d50c7134d5cf0e248 +size 33214464 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..459fe401c802654ab6856c549cf88db2676ea647 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef967a4b57be34eec2c473fe5368fa8a9a74e0c02acdf12fa2bddcf72a4a9c5 +size 33177600 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..737fa0ddb2897a6614f7a696c623e3ce201c2bd7 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb2f8cf4d4fcc5d31d86f32a8815f12accd43f9eb51183c46fff7b86dc01e83 +size 21233664 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..92186d8798624386f1656da4ece59140c307190f --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc8e27904262b07cc9e87ba4427d64bc4c8a6ada6cbf4e484a885ddd4c6ab6a8 +size 33214464 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..da466c1004d7906d7d7d6095457f977939ffe87b --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6460ddd46486312f6bd2b2cd8581196fc1b6417dfb56a82005a6b35e94a66b37 +size 21233664 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..6210360bc4a2e01c17258fa3432dca51d7605e4d --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80cd240a75af027b84a987b71075caeab64bbbd0e3cbc01b9b1642823f344d7c +size 33214464 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..8cce97f92cdb481aae8219659cd7fc195b461a8a --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331ccd29b891513caefc425ce5179832eea41d00580790c524c086dc819ad29c +size 31882752 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..875add00e04e809a2531a2dd93f56d03415d9d0d --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c8098b23a69c1e043765c0b8aa74be841640726b5a958e443a4843a31fb3a0 +size 2658816 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6694b112488f0822ff1ad7936872aa3adcedbc7 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e80a75bfa0b4add500f34dd34874b9279b15b30f3d0271c0e2696238cf7be7f +size 33177600 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..d61ad92ecb6e964558a60c05798fe34f1a19e582 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11bad48cd19240f517339320ca82d8af651d9f4128422ad3f2631041b5f1fe4 +size 21233664 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..45d71f833cb81d0a922529df041b12d7254dd849 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2393ab6bd225764a8e66e3ab57008e37166965606cc4fcbfe93183086fea9fac +size 33214464 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3ef21ad2557a5f30f2f8bf03be0493c2094671e --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3cfb41f22c67f4a98efc367dd2cff1541c224aa30f887148b504e3a15c6989f +size 33177600 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8c7257523e2c4e478909fe9b4f4ba601c0fe924 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0476ce2eeb1302029572570dd608cceaee85331643786b926becc36710b461a4 +size 21233664 diff --git a/tensor-cache-b16.json b/tensor-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..c86885dce452b6230f33a5d8c6d2304e9aa9a946 --- /dev/null +++ b/tensor-cache-b16.json @@ -0,0 +1,3705 @@ +{ + "metadata": { + "ParamSize": 315, + "ParamBytes": 1634780160.0, + "BitsPerParam": 5.002498464347751 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 294912000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 294912000, + "byteOffset": 0 + } + ], + "md5sum": "1056a334963736d59422e83e31cb241d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 36864000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 36864000, + "byteOffset": 0 + } + ], + "md5sum": "625a34c9eab976edd9064fcd0bcd959b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33182208, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 4608 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10621440 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11948544 + } + ], + "md5sum": "6acbc70fe032f619ec0d7bf36a54a33c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "175a78fbf7424be2f32f50fbee026857" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "7046dd6a28b8387479009365eae5ff4c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d40768de670da0b84ec61b5931183d43" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "de00bdc0805a54eb5e45ed0909b7ed87" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "80b8a5cdb189dfe18e2238a896d52429" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "499c6f1c3955b7ac48e1baf54f03bc75" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "d31be670584c3046d1e37cfdb9d9ddd6" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d4c11e91b9585fb3780730c909a30752" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "57a6b07fdb8aa49529a7b1b4959757ea" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "67614580b3f2a8a38e85aa9b0267e77a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "3a8f3b58cadd6366dd53d8378d3f8af1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "2f0476c0a82d1270be330f2b11b0095e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b0a8a0bf26d878221850484946ce030a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f1a76e763b1dc0e9c67df48528bc0604" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "c9afc0e296625eb4651b8faf2a0fb6b0" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "95b6cee3c0471d1dbb0fea2e1baf4abd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f67c0fd6ce5403528b801dcad10d1816" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "584e8d4ef70fb14e3772af0979bd3f73" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "0adcc615d1d9695b4ef590c89de35db7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "14694eefe69a69a9223867449c09d11c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "3fbc7ba0b3c4e0f20ae744e85561185e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "fb2183e1778ec2dfd9cd348116591da5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "a76f404041e3f6647d86cbc041bb144e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d30f7600b2debb90cf9851c84ea90ab4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31864320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10630656 + } + ], + "md5sum": "e33528541ac66995a65d4059e025adb4" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "2334942f272bf7f3771df44bdc75a8f9" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33200640, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2654208 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7372800 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7962624 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10321920 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10616832 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10621440 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21238272 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22565376 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25219584 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25224192 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25228800 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25233408 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29952000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30541824 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32901120 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33196032 + } + ], + "md5sum": "9ee5e7a7f0213b4d55deb8cea35b3a8e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "babbe4d246c14e00495e33536df1e291" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "21f58b7fc7ca5c6dbf8d2953aa7abdb9" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "44ed88825a210c8e2033400e70300767" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "93e69ae37c7383b0004a29cc419c8726" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "29656ba96b5514b3401910b2cf1b2093" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "550983d43939b0d3d752d29349cefe9c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "e041c162985da23150d03713fec452c5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c816f0c057f977aef0d917927c495952" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d5cce7273b21673fa7394f92f0b7e53d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7e6faa752093ea3d32faa7e8226b3dd1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 31882752, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11943936 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11948544 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11953152 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 11957760 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 11962368 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1327104, + "byteOffset": 22579200 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2654208, + "byteOffset": 23906304 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 26560512 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 26565120 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 26569728 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26574336 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 31292928 + } + ], + "md5sum": "30870903e1e4927fff8d281221db6cf9" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 2658816, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 294912, + "byteOffset": 2359296 + }, + { + "name": "model.norm.weight", + "shape": [ + 2304 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4608, + "byteOffset": 2654208 + } + ], + "md5sum": "460980e964a2321290ce2641bdadbf75" + } + ] +} \ No newline at end of file diff --git a/tensor-cache.json b/tensor-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..c03b882c12281b560a546b341c87cbbd3c3fe643 --- /dev/null +++ b/tensor-cache.json @@ -0,0 +1,3705 @@ +{ + "metadata": { + "ParamSize": 315, + "ParamBytes": 1634780160.0, + "BitsPerParam": 5.002498464347751 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 294912000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 294912000, + "byteOffset": 0 + } + ], + "md5sum": "1056a334963736d59422e83e31cb241d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 36864000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 36864000, + "byteOffset": 0 + } + ], + "md5sum": "625a34c9eab976edd9064fcd0bcd959b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33182208, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 4608 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10621440 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11948544 + } + ], + "md5sum": "6acbc70fe032f619ec0d7bf36a54a33c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "175a78fbf7424be2f32f50fbee026857" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "7046dd6a28b8387479009365eae5ff4c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d40768de670da0b84ec61b5931183d43" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "de00bdc0805a54eb5e45ed0909b7ed87" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "80b8a5cdb189dfe18e2238a896d52429" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "499c6f1c3955b7ac48e1baf54f03bc75" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "d31be670584c3046d1e37cfdb9d9ddd6" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d4c11e91b9585fb3780730c909a30752" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "57a6b07fdb8aa49529a7b1b4959757ea" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "67614580b3f2a8a38e85aa9b0267e77a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "3a8f3b58cadd6366dd53d8378d3f8af1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "2f0476c0a82d1270be330f2b11b0095e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b0a8a0bf26d878221850484946ce030a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f1a76e763b1dc0e9c67df48528bc0604" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "c9afc0e296625eb4651b8faf2a0fb6b0" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "95b6cee3c0471d1dbb0fea2e1baf4abd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "f67c0fd6ce5403528b801dcad10d1816" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "584e8d4ef70fb14e3772af0979bd3f73" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "0adcc615d1d9695b4ef590c89de35db7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "14694eefe69a69a9223867449c09d11c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "3fbc7ba0b3c4e0f20ae744e85561185e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "fb2183e1778ec2dfd9cd348116591da5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "a76f404041e3f6647d86cbc041bb144e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "d30f7600b2debb90cf9851c84ea90ab4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31864320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10630656 + } + ], + "md5sum": "e33528541ac66995a65d4059e025adb4" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "2334942f272bf7f3771df44bdc75a8f9" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33200640, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2654208 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7372800 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7962624 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10321920 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10616832 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10621440 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21238272 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22565376 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25219584 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25224192 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25228800 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25233408 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29952000 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30541824 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32901120 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33196032 + } + ], + "md5sum": "9ee5e7a7f0213b4d55deb8cea35b3a8e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "babbe4d246c14e00495e33536df1e291" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "21f58b7fc7ca5c6dbf8d2953aa7abdb9" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "44ed88825a210c8e2033400e70300767" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "93e69ae37c7383b0004a29cc419c8726" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "29656ba96b5514b3401910b2cf1b2093" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "550983d43939b0d3d752d29349cefe9c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33177600, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11943936 + } + ], + "md5sum": "e041c162985da23150d03713fec452c5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c816f0c057f977aef0d917927c495952" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33214464, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2658816 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2663424 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 2668032 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 7386624 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 7976448 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 10335744 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 10630656 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10635264 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 21252096 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 22579200 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25233408 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25238016 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 25242624 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 25247232 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 29965824 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30555648 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 32914944 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 33209856 + } + ], + "md5sum": "d5cce7273b21673fa7394f92f0b7e53d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 18432, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7e6faa752093ea3d32faa7e8226b3dd1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 31882752, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 10616832 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11943936 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11948544 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11953152 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 11957760 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2304, + 1152 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 11962368 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2304, + 288 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1327104, + "byteOffset": 22579200 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 18432, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2654208, + "byteOffset": 23906304 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 26560512 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 26565120 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 26569728 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 4096, + 288 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26574336 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 4096, + 72 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 31292928 + } + ], + "md5sum": "30870903e1e4927fff8d281221db6cf9" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 2658816, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2304, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2304, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 294912, + "byteOffset": 2359296 + }, + { + "name": "model.norm.weight", + "shape": [ + 2304 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4608, + "byteOffset": 2654208 + } + ], + "md5sum": "460980e964a2321290ce2641bdadbf75" + } + ] +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..ec00aeef134de00b78295136491b39e4efa0a26a --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c36fea8c2ad69ed08c6180b465a70bf23e9bde2453632f9d3ba827383154def +size 34362885 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..0fb99fadba40bda6e559565d903f62cfde63b88a --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6969e64047744a44bb3abfb5c50f8de0f7ed8b571d5444426ef931f651d1a0ef +size 4241111 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7b780e84c47f53ea44180c2729027c55a1d369d0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2009 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 2048, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}