|
{
|
|
"metadata": {
|
|
"ParamSize": 325,
|
|
"ParamBytes": 4517404672.0,
|
|
"BitsPerParam": 4.500381277757404
|
|
},
|
|
"records": [
|
|
{
|
|
"dataPath": "params_shard_0.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 262668288,
|
|
"records": [
|
|
{
|
|
"name": "lm_head.q_weight",
|
|
"shape": [
|
|
128256,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 262668288,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "a1b4f2c6e52c9198a5b9409a4c36bfdd"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_1.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.31.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "840a03087c6c627fe592c5cd8b2c4ed1"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_2.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 32841728,
|
|
"records": [
|
|
{
|
|
"name": "lm_head.q_scale",
|
|
"shape": [
|
|
128256,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 32833536,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.31.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 32833536
|
|
}
|
|
],
|
|
"md5sum": "b273fa59cdfdd1f4d5487378775cb170"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_3.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 262668288,
|
|
"records": [
|
|
{
|
|
"name": "model.embed_tokens.q_weight",
|
|
"shape": [
|
|
128256,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 262668288,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "0b8017cde37f54fd2073c464e0212643"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_4.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 32833536,
|
|
"records": [
|
|
{
|
|
"name": "model.embed_tokens.q_scale",
|
|
"shape": [
|
|
128256,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 32833536,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "ec1f5c954a5962ff7a038ec0f8e110e2"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_5.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33054720,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.31.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.31.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.norm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 3678208
|
|
},
|
|
{
|
|
"name": "model.layers.0.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 3686400
|
|
},
|
|
{
|
|
"name": "model.layers.0.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 3694592
|
|
}
|
|
],
|
|
"md5sum": "2ae973efd1b91dc425033f1f075853e2"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_6.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "53ea69b633080c6e349e07762a821695"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_7.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.0.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.0.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "9fb6dfbfe7e2d37bb59966557930e884"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_8.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.1.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "0d9de937f7a6bc9cc2e8687f1cede573"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_9.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "7fcf3860fd53182d98e3a18e3922b7c2"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_10.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.0.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.0.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.1.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.1.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.1.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "fcde7a73e8add8334e4063f8d1f342f4"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_11.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.2.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "60e788c13d7e600886d5259ba3dcd359"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_12.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "0beee69ff1c286064e9f4888772e691d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_13.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.1.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.1.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.2.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.2.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.2.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "5ad80a11a8f01a3e6a04d2a5b4da05a7"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_14.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.3.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "82df641dfc781bdf0e3c9c41084fab96"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_15.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "ea36e7479478306c64a2ec7ccbf6c9fe"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_16.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.2.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.2.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.3.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.3.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "dbfd1fd343072927c7e30f7d0159dd59"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_17.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.4.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "58f24e7e5c32b0e23f9c6abfe34f160d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_18.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.3.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.4.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "be66924f93d6912f3aa80bc4a50f0145"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_19.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "9100ba813e8f542f6e62fa271c2e9637"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_20.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.4.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.4.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "e7ea9b0ae7aa79132195446c7e08996b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_21.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.5.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "2983f44460186bda1656b81e41b84570"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_22.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "8d6963ab3581021c97d358f77d400f4b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_23.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.4.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.5.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.5.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.5.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "cd5c936c005c0e0270b85d243bb23576"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_24.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.6.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "6aba021d47a972128aaa11b6147261ea"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_25.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "29ff24d0e7ab6210be3f529a2e7796b9"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_26.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.6.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.6.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.6.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "c473c98e969eb64b56a42f1a03d99986"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_27.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.7.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "819bc23b0ec456ab164c2ccb8312fd25"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_28.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "ab312269d3b2a609d4292b1dc5e2831a"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_29.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.7.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.7.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "fc7b1e684c5a185c2a270cdec88850ee"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_30.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.8.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "688f8f61073d66c4ca106e4ae433fb93"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_31.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.7.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.8.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "4e3e335f5e321834074371a22c982c72"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_32.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "6c07d79523b925436c50f89d0583d579"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_33.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.8.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.8.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "1aa09a1994137eba73f90f0ec7b9acc7"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_34.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.10.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "b5f872553249faf0827808720d86fd2f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_35.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "cde7fb13a5912f5a1a61e12ed7c64110"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_36.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.8.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.10.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.10.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.10.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "0e422e021ca43a524bf7cb5702050e42"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_37.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.11.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "e0a6ee7bef730f6acdad810e2216bb0f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_38.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "5c6a8f346fee906d0baa2b5d7881ec55"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_39.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.10.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.10.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.11.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.11.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.11.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "a9115dfc0bd89c32cd5079f5f24a45b0"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_40.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.12.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "683495cc946d96e434a24019f7b4a83b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_41.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "9a99086080f65b6416b69e4ef2fd414b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_42.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.11.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.11.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.12.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.12.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "816c2be20388f4c7948002c60f49939d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_43.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.13.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "59f2d948426883c5ce6d22850f70294a"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_44.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.12.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.13.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "4c711048d290eb3c3c0d9a51892c4005"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_45.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "4b29ef58db3750c13b28108b9a3af72c"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_46.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.13.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.13.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "c976e7c3d841107081767b19a2e095da"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_47.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.14.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "e9acccaac4528bd92e0277133796be97"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_48.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "52ffd063981f3141aacbd9a167f5d11e"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_49.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.13.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.13.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.14.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.14.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.14.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "7cf71f3903202cbe7753086dde01134f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_50.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.15.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f1fe8ab15507d6e37d2aadeb81ee34d0"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_51.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f01052af3446a8d4fd4d07055d245c04"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_52.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.14.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.14.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.15.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.15.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.15.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "8040a91782f2da4664aab46cec3016f3"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_53.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.16.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "5524d4ccfd272c240b1884c7e12aa5ab"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_54.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "bd4b085cfe7764e8d5ce7a1d25eb630b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_55.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.15.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.15.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.16.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.16.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "1d7bbf1ba53fab17292134ba2abefefd"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_56.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.17.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "14f4c9fe5cc6e1d25727da1ef8ea90fb"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_57.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.16.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.17.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "2910348f3e3ec8e28bf304f6ea948a15"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_58.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f9ddc3178d6eb23ff4b5e0a6166419ee"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_59.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.17.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.17.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "ec1fbf24ff637572fe34be188d3216b3"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_60.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.18.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "6fa20b8dc7fd47adf0329ee866bf3a9d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_61.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "aea1308bc4ee681992733f2407cc065f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_62.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.17.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.17.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.18.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.18.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.18.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "8e216ab4afebc3bc725f701e563b907c"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_63.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.19.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "51b8c1fe471248ebd402f224557b77fe"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_64.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "3f224b93a88a5020616cc7c431b2e93d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_65.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.18.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.18.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.19.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.19.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.19.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "551889ae6444dddc2ad09ee565253941"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_66.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "4fb48687d405d5f25e8e39eaf17e972f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_67.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30932992,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.19.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.19.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 23592960
|
|
}
|
|
],
|
|
"md5sum": "b7495792582b6c6c35475603316812ea"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_68.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.9.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "9d65b62e5bc63d7f6d33b99dc2666fea"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_69.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "d94f64c209fb226d7f83e24325fdc2dc"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_70.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.9.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.9.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "ae599e066309f5bf9a8780fcd9b91742"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_71.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.20.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "0f6e3d9ddf13c7ff3f04380c2ccfc5e8"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_72.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.9.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.20.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "296b7c58538beba09b7e52d6daa9ee93"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_73.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.20.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.20.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.21.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 3678208
|
|
},
|
|
{
|
|
"name": "model.layers.21.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 3686400
|
|
}
|
|
],
|
|
"md5sum": "a2f9bed488cbf5c0edcab142230f16d8"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_74.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "4f43f42b2a1b8aa0cf110a913b2ec2f9"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_75.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.21.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.21.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "bcca9a58fa67755b46fb566600e37700"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_76.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.22.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "83b13975ccbc23a92c9249291afbdb57"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_77.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "623d6320445b62f536f438e1160d0725"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_78.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.21.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.21.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.22.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.22.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.22.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "db69a1e2f70ce3d63eedd7faa7452f9c"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_79.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.23.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "baebd2f288fe3ea512b252f52409aecd"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_80.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "36fa518e2a2d73f5ff7391a40b98678d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_81.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.22.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.22.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.23.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.23.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.23.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "49d14acff2a86f34be87e92fbca27107"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_82.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.24.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "d75fddf1f671d59c9af04e8a8e870216"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_83.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f64856d57ec02e6251e470428f7a9291"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_84.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.23.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.23.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.24.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.24.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "13bbb5ca06e47f7ba6b6c6453b5508b9"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_85.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.25.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "4ff5b39b4c15d4daa369f9651dccdc63"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_86.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.24.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.25.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "a962b7ce298228debb135b9aa22ee5c3"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_87.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f80f60b68c5530560ad24a177580083d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_88.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.25.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.25.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "d22ec1d0525b8baad09b953eb547dc53"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_89.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.26.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "ae2aa368b950add9127a6e0b1c0430fa"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_90.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "35329ab435260f46d2b0b667af97d4ab"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_91.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.25.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.25.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.26.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.26.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.26.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "7920af8eea8c83f77c717b1e51976ff0"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_92.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.27.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "863774552010027b0ba269dcca9ab270"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_93.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "1398099f9e81dfb9300e9cbe76c980b4"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_94.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 22036480,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.26.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.26.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.27.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.27.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 14688256
|
|
},
|
|
{
|
|
"name": "model.layers.27.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 22028288
|
|
}
|
|
],
|
|
"md5sum": "32cc689e7d2f4c5af963c2697400132f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_95.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.28.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "cabdbbc2e77c006016dfcfd77a76a6e7"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_96.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "28a8c25ff34ece8b6d408d258f247995"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_97.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 27271168,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.27.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.27.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 22544384
|
|
},
|
|
{
|
|
"name": "model.layers.28.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 23592960
|
|
},
|
|
{
|
|
"name": "model.layers.28.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "0515dedfaa3353da90d198e7d893c1a9"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_98.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.29.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "169b8823ddc147472ac4898a3d4c6f11"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_99.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 30949376,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.28.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 7340032
|
|
},
|
|
{
|
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 7348224
|
|
},
|
|
{
|
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 19931136
|
|
},
|
|
{
|
|
"name": "model.layers.28.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 21504000
|
|
},
|
|
{
|
|
"name": "model.layers.28.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 29892608
|
|
},
|
|
{
|
|
"name": "model.layers.29.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 30941184
|
|
}
|
|
],
|
|
"md5sum": "97296d9cc5074d20ac628f099778622a"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_100.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "b4b8f6b68fa3dbb668a85df357c52fc6"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_101.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25174016,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.29.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 3670016
|
|
},
|
|
{
|
|
"name": "model.layers.29.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 11018240
|
|
},
|
|
{
|
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 23601152
|
|
}
|
|
],
|
|
"md5sum": "14718b24da1a0587dc98b05db813a54f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_102.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 29360128,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.30.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
1792
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 29360128,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "25d0e6ab1a43cc7eeac1a333044995a5"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_103.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "039cd90ccd532b8c301e591a11114786"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_104.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 33046528,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.29.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.29.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
},
|
|
{
|
|
"name": "model.layers.30.input_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 9437184
|
|
},
|
|
{
|
|
"name": "model.layers.30.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
448
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3670016,
|
|
"byteOffset": 9445376
|
|
},
|
|
{
|
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 13115392
|
|
},
|
|
{
|
|
"name": "model.layers.30.post_attention_layernorm.weight",
|
|
"shape": [
|
|
4096
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8192,
|
|
"byteOffset": 20455424
|
|
},
|
|
{
|
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 20463616
|
|
}
|
|
],
|
|
"md5sum": "67d4ca8730a6f29f4f068d1632fe8acd"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_105.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 58720256,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
28672,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 58720256,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "04be16b84fbd8eb556d3cf5ea0cb470d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_106.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 32505856,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.30.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 1572864
|
|
},
|
|
{
|
|
"name": "model.layers.30.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 9961472
|
|
},
|
|
{
|
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
28672,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7340032,
|
|
"byteOffset": 11010048
|
|
},
|
|
{
|
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
6144,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 18350080
|
|
},
|
|
{
|
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
6144,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 30932992
|
|
}
|
|
],
|
|
"md5sum": "e08689ebe1b287aaf51dc28e49c10195"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_107.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 9437184,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.31.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
4096,
|
|
512
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 8388608,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.31.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
4096,
|
|
128
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1048576,
|
|
"byteOffset": 8388608
|
|
}
|
|
],
|
|
"md5sum": "828ddcc7bdb31ece5ef0995a85a16e0b"
|
|
}
|
|
]
|
|
} |