starcoder-q3f16_1-MLC / ndarray-cache.json
junrushao's picture
Initial commit
a4a8eff
Raw
History Blame Contribute Delete
270 kB
{
"metadata": {
"ParamSize": 648,
"ParamBytes": 7137087488.0,
"BitsPerParam": 3.6092729746843064
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 121110528,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
49152,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 121110528,
"byteOffset": 0
}
],
"md5sum": "08e67fc89dfe9920284571c1ecce3580"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.38.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "65210ff0006702f826d9e3a8a95901d8"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32256000,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
49152,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.38.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 15138816
},
{
"name": "transformer.h.38.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 15151104
},
{
"name": "transformer.h.38.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 30289920
},
{
"name": "transformer.h.38.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32182272
},
{
"name": "transformer.h.38.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32194560
},
{
"name": "transformer.h.38.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 32206848
}
],
"md5sum": "3f4381292568d434b3b0cbc576d40177"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.38.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "9302396d5f345ae77ff7e1486608eea6"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32904704,
"records": [
{
"name": "transformer.h.38.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 0
},
{
"name": "transformer.h.38.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 7569408
},
{
"name": "transformer.h.38.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 7581696
},
{
"name": "transformer.h.39.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 15138816
},
{
"name": "transformer.h.39.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 15151616
},
{
"name": "transformer.h.39.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 30921216
},
{
"name": "transformer.h.39.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32892416
}
],
"md5sum": "9b123944450d2d1314842dfc3c50d2c7"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.39.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "eee96aaa6d229a7d7df61d9e307ef9ef"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.39.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "481ea66d938604bba0df984c78881b08"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32305664,
"records": [
{
"name": "transformer.h.39.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.39.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.39.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.39.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.39.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.39.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.39.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.39.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.39.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.39.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.ln_f.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32268288
},
{
"name": "transformer.ln_f.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32280576
},
{
"name": "transformer.h.0.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32292864
}
],
"md5sum": "3baf4c628a4e826ee73fa82bfb34ca22"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.0.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.0.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.0.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.0.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "cf00311b0b7b987d43d8454c0db8fc59"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.0.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "0a62f5e7efe95721345de7d7be3078d8"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.0.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "7fd33d8b9952e1edfb9f7987020859c7"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.0.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.0.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.0.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.0.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.0.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.0.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.0.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.0.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.0.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.1.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.1.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "cf000b76899082167cbe5713f59251f4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.1.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "d251ca3f170567e1ff0e2daa17bffe8b"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.1.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "c413af1958ce2db5be0c5711613a5c40"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.1.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.1.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.1.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.1.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.1.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.1.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.1.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.1.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.1.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.1.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.1.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "b2d947dbb88b4bb327f12e7ee76f317c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.1.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.2.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.2.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.2.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.2.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "c30db6ae55ef72ffa0d413448c12ebf3"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.2.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "0130f7b4535a8c0a303fc8ed55402229"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.2.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "13fafa9488673685351b9d2a9c26bea1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.2.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.2.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.2.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.2.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.2.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.2.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.2.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.2.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.2.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.2.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.3.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "57764d4b11296567d374eb4d97a2dfe8"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.3.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.3.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.3.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.3.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "fbdeb2a069b832ad1b4a3708a4bbdf34"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.3.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "3bca164a7abb780821ac51624aedda37"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.3.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "eb82e855ec657c3817ceb78542bb306d"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.3.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.3.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.3.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.3.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.3.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.3.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.3.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.3.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.3.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.4.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.4.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "06cef7c8c33ac0d9ca1497995f1dc0c4"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.4.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "63adac2abdad63f0c0a5066ab041b8a3"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.4.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "15d8bd8e8c596a410051a79f2aa4bb41"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.4.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.4.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.4.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.4.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.4.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.4.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.4.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.4.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.4.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.4.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.4.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "a495852193ac51223f11901e399a3138"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.4.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.5.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.5.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.5.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.5.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "a00967be9d02bdadee85b1ab9fedc481"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.5.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "5f5374cf73e851b64801111dc7ee3453"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 20185088,
"records": [
{
"name": "transformer.wpe.q_weight",
"shape": [
8192,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20185088,
"byteOffset": 0
}
],
"md5sum": "36446746451e93f6c5773e29204a954a"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 121110528,
"records": [
{
"name": "transformer.wte.q_weight",
"shape": [
49152,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 121110528,
"byteOffset": 0
}
],
"md5sum": "08e67fc89dfe9920284571c1ecce3580"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 27222016,
"records": [
{
"name": "transformer.h.5.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.5.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.5.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.5.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.5.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.5.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.5.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.5.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.wpe.q_scale",
"shape": [
8192,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2523136,
"byteOffset": 24698880
}
],
"md5sum": "605d30a700b38a7664228eafe33bb7df"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 32904704,
"records": [
{
"name": "transformer.wte.q_scale",
"shape": [
49152,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.10.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 15138816
},
{
"name": "transformer.h.10.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 15151616
},
{
"name": "transformer.h.10.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 30921216
},
{
"name": "transformer.h.10.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32892416
}
],
"md5sum": "ef927424e92db0adc9e6fa944e3bc219"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.10.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "303034166faa159a2e6d8812a4e88496"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.10.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "21cec455be38bc891506201c7ca7725c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.10.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.10.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.10.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.10.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.10.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.10.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.10.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.10.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.10.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.10.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.11.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "5f8f0895b5319c72387395fe13c97f37"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.11.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.11.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.11.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.11.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "0c474bddc4f9bca0bb4a2ae5ca6ed643"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.11.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "f9e22562f5d62fbc4a30e07602716236"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.11.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "a58ee2cec1798905866ae80c70f108bf"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.11.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.11.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.11.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.11.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.11.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.11.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.11.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.11.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.11.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.12.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.12.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "9f496ede88c1e9de42132bbd58fc275c"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.5.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "629329e4869611f87cbc7bc746c2a9ae"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 27331072,
"records": [
{
"name": "transformer.h.12.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.12.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.12.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1983488
},
{
"name": "transformer.h.5.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1995776
},
{
"name": "transformer.h.5.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 2008064
},
{
"name": "transformer.h.6.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9565184
},
{
"name": "transformer.h.6.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 9577984
},
{
"name": "transformer.h.6.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 25347584
},
{
"name": "transformer.h.6.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 27318784
}
],
"md5sum": "7a54768e494bace27b4690e9e63bdf13"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.6.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "8eace8bea59705d4a3687ab1e962acf6"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.6.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "a68536821675630143e7d01931058199"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.6.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.6.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.6.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.6.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.6.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.6.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.6.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.6.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.6.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.6.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.7.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "a5e278137ce001794ad695b4217c15d0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.7.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.7.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.7.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.7.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "7032e4aa8259748ce91f23e234b37544"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.7.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "53eec555e63f4446a8d75283681d8469"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.7.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "27c9e6096454a2f1331468838fc32b63"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.7.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.7.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.7.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.7.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.7.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.7.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.7.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.7.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.7.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.8.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.8.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "9c302990370d7748b31d476299a06490"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.8.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "e6483e4d2a68ba4ee0bc5bf3c24b79f1"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.8.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "73fdccc86f2e69974d122617a8d3da87"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.8.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.8.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.8.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.8.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.8.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.8.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.8.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.8.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.8.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.8.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.8.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "2b51f644ea7c0b74fe058358e056f5dc"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.8.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.9.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.9.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.9.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.9.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "980e7f1c97fee44bc6024da75ebae364"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.9.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "884f3fe130e8b581c80b16ffe14ea550"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.9.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "ef450e5893992cf8783c8624bc016da3"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32280576,
"records": [
{
"name": "transformer.h.9.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.9.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.9.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.9.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.9.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.9.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.9.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.9.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.9.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.9.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.12.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 32268288
}
],
"md5sum": "7d08a7dbb621cab02d492942e1388567"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.12.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "7d2e06586ff0028ead5c57c727a04bdc"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.12.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "0d1e73a36e1ee974ce755dc6d3634303"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32256512,
"records": [
{
"name": "transformer.h.12.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.12.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.12.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.12.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.12.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17055744
},
{
"name": "transformer.h.12.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17104896
},
{
"name": "transformer.h.12.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24674304
},
{
"name": "transformer.h.12.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24686592
},
{
"name": "transformer.h.13.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32243712
}
],
"md5sum": "844458ea1005739ad3f7688b557320c7"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.13.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.13.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.13.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.13.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "afe302b41fc888362ea47caeb9301bb2"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.13.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "3c3ec1f601d8e6b5cced7156061afc38"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.13.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "bbe84ba60475fcb2d28e3ead98035a1d"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.13.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.13.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.13.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.13.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.13.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.13.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.13.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.13.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.13.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.14.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.14.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "22dd9cda896be91e766d8db20801b62c"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.14.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "8c56a1fa2ff425534918da2c9f496698"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.14.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "cf0888c26c4b9edbae3be0b7ae798be6"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.14.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.14.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.14.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.14.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.14.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.14.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.14.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.14.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.14.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.14.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.14.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "093996306cfcf00b51266383eee7a5d0"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.14.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.15.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.15.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.15.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.15.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "162af605314e5c5b9dcd30a346da1635"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.15.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "eea02481e677bdbfd966e7e992f35ed4"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.15.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "30f62a3ff8fcc4f2d4f2528b3b2db2b8"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.15.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.15.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.15.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.15.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.15.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.15.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.15.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.15.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.15.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.15.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.16.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "70470eb01f85fbce0f00710c6c4dd94c"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.16.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.16.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.16.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.16.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "8c3e84b736ac45896f1ed842ee59e7e3"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.16.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "09cb3b305ead6d11150d1794da85f176"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.16.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "2a2344b40f06cc913c9158cbcb060997"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.16.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.16.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.16.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.16.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.16.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.16.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.16.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.16.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.16.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.17.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.17.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "53a8b06db906ddfdc6f3f152681f9dd9"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.17.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "7b97a6b50b128209e136d9a5aeb47edc"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.17.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "90604df483dbea68eae1ed1d3aa8385c"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.17.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.17.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.17.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.17.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.17.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.17.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.17.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.17.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.17.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.17.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.17.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "9db31eb5fd0ffda0af86b542587405f1"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.17.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.18.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.18.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.18.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.18.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "1f26544ad632da0520f8602276e2a6f4"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.18.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "d1127ad6d4160342e99ef76e4636a2c3"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.18.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "f5cbc249ea190d5692cfbabde17f9e2e"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.18.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.18.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.18.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.18.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.18.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.18.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.18.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.18.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.18.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.18.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.19.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "734951c7c827772850f043964a530e9b"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.19.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.19.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.19.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.19.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "717c60bf5c3c84d904f186c33aeddc09"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.19.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "e54ee7c7fdfed475df3dc3805790bac1"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.19.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "7cca97167da75bba9fc5020f3651cfc8"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.19.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.19.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.19.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.19.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.19.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.19.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.19.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.19.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.19.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.20.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.20.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "4c45b1aa408289c8920d7948558d0c6b"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.20.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "1be0a9543acc8abfe2f9b39b47ed7056"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.20.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "1b0abbcb246a8f1dbf9c5b639c2ac6f2"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.20.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.20.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.20.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.20.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.20.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.20.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.20.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.20.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.20.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.20.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.20.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "0b12e8a4f04278245d357c398916b8ff"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.20.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.21.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.21.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.21.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.21.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "f0202b527d86b76cf5a992e8eda2768a"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.21.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "975991e7582dbe6662fc606f3b3b4e9e"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.21.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "e0e584476ebf86808361ab3e374c49ac"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.21.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.21.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.21.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.21.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.21.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.21.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.21.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.21.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.21.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.21.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.22.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "44a45f9988b94be391540bb4b96bd5b2"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.22.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.22.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.22.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.22.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "d92d7d7ea6d7fc313aebabf96b04d500"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.22.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "ef7c507568b87c81150fbdb29eaa73e1"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.22.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "03a53d75fb54a7fd9e20ab93c394104a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.22.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.22.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.22.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.22.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.22.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.22.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.22.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.22.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.22.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.23.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.23.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "2f0e70a42c64fc1fc3526c5eece0120f"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.23.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "5c39dcd4e7ab09273d39bf9ae8a599c3"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.23.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "6e177dd7df6594c2923eccc281a248f8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.23.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.23.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.23.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.23.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.23.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.23.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.23.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.23.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.23.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.23.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.23.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "35c1245d7d20bc1e5423e6d3a0599930"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.23.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.24.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.24.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.24.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.24.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "e678030b44101053b70e14137c1491a0"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.24.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "ed599cc953621da88498f0cc77ee4d77"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.24.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "11f598e16cacd4c96101495b68b10636"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.24.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.24.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.24.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.24.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.24.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.24.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.24.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.24.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.24.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.24.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.25.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "b3743f74651e99f65f26d1d0f8e65473"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 32916480,
"records": [
{
"name": "transformer.h.25.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.25.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.25.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.25.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17753088
},
{
"name": "transformer.h.25.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17765376
},
{
"name": "transformer.h.25.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17777664
}
],
"md5sum": "2b450ff69b63b72b1d0938db61a78095"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.25.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "d02d5ce97dfe03b681dacb6f7cf6a800"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.25.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "07d757a9e3118fcc9391c89a7c55bc8d"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 32887296,
"records": [
{
"name": "transformer.h.25.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.25.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.25.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.25.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1916928
},
{
"name": "transformer.h.25.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1966080
},
{
"name": "transformer.h.25.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9535488
},
{
"name": "transformer.h.25.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9547776
},
{
"name": "transformer.h.26.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17104896
},
{
"name": "transformer.h.26.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17117696
}
],
"md5sum": "9c35299bd5988b438dff6e822b798f09"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.26.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "7bf9bf6e31d6247bdc48b92a2d4c184f"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.26.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "4a9bf8d8b2365fe59a97f7232a36cc41"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.26.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.26.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.26.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.26.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.26.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.26.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.26.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.26.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.26.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.26.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.26.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "3de3170f3f7d2473839d0667c3ed0fe9"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.26.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.27.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.27.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.27.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.27.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "4354c55f0f4ee485c620402c11356e2c"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.27.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "529adcb23bcb0e714418dcdd0ce98aba"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.27.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "5694556b93f8e41f2e6ec124a95c9d0f"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.27.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.27.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.27.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.27.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.27.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.27.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.27.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.27.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.27.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.27.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.28.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "db708cecc9c0e1db114def2fa2f3e63b"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.28.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.28.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.28.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.28.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "4fed18b3c01992c312825cbd58331da4"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.28.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "c7585fbf9699a6dc292be09aa16ec914"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.28.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "8107e9c496cecbc16f1ce02ef8b168a8"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.28.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.28.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.28.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.28.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.28.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.28.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.28.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.28.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.28.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.29.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.29.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "89d03de07f154a5ae8fb9a26a7293ba8"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.29.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "f9a6ad1f4848b0755fc9490be87926ef"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.29.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "eb6f5abbddf0980c3abc133183e5043c"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.29.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.29.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.29.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.29.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.29.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.29.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.29.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.29.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.29.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.29.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.29.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "d3e1a73ce080b3656da7ac25562e21c1"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.29.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.30.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.30.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.30.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.30.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "556c4c42389d22c87b85c42c18d81321"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.30.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "8c0c1728483a425b8e9d88281ee0b170"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.30.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "a32ee825340cfc1dea3e8cd0441d507d"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.30.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.30.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.30.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.30.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.30.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.30.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.30.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.30.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.30.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.30.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.31.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "cff47423323d27f1406d82b6ed185242"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.31.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.31.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.31.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.31.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "fcd503ff0522985f9370854f15288e54"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.31.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "a4cbba40e9848307769ee1c30edeeaa5"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.31.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "02c24208dff904d0859a37619efd3c0a"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.31.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.31.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.31.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.31.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.31.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.31.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.31.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.31.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.31.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.32.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.32.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "bb6a0cc8ea6ad5b86ae9d21952e07dba"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.32.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "07733e78088d2d064da351022baf56c4"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.32.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "8a7a63b48b3bd9c85bccee1837f0740c"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.32.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.32.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.32.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.32.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.32.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.32.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.32.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.32.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.32.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.32.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.32.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "9956213f27b7076626b89c585d9f205f"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.32.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.33.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.33.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.33.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.33.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "f5d7b00bafa56de1197f425e88e0c7ca"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.33.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "297484f2f5207e3dcc428a00abc3174e"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.33.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "9aaa170c2f5d1b8e75ce711f3ae419d7"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.33.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.33.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.33.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.33.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.33.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.33.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.33.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.33.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.33.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.33.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.34.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "4cc3a470680bd13091bb94d683944678"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.34.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.34.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.34.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.34.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "74d083d4aa6ad7e320bac0e250402aca"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.34.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "9bf5517c14a5a938891635a13f9856a8"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.34.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "2fe793949f4f21401e80d75466431eb8"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.34.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.34.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.34.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.34.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.34.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.34.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.34.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.34.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.34.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.35.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.35.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "36a08d65e4bdcb1d4cf1fc5da60ab21d"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.35.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "d8a7c85d042d82ed8f75d89cdd1064fc"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.35.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "47fd3f1dcdca8aa6b22bef4ed01e4e66"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 26694656,
"records": [
{
"name": "transformer.h.35.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.35.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.35.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 1983488
},
{
"name": "transformer.h.35.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 17122304
},
{
"name": "transformer.h.35.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19014656
},
{
"name": "transformer.h.35.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19026944
},
{
"name": "transformer.h.35.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19039232
},
{
"name": "transformer.h.35.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19051520
},
{
"name": "transformer.h.35.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19063808
},
{
"name": "transformer.h.35.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 19112960
},
{
"name": "transformer.h.35.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26682368
}
],
"md5sum": "15a23ab95790dc7d615830cf51c59e9b"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 25323008,
"records": [
{
"name": "transformer.h.35.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 0
},
{
"name": "transformer.h.36.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 7557120
},
{
"name": "transformer.h.36.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 7569920
},
{
"name": "transformer.h.36.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 23339520
},
{
"name": "transformer.h.36.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 25310720
}
],
"md5sum": "23044beb339019409079afd0e39fadcd"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.36.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "c373ddfa17d167debaa43d3db2bc1acb"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.36.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "0ef0aa2797f4f921a322a421570b8fe1"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 32281088,
"records": [
{
"name": "transformer.h.36.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 0
},
{
"name": "transformer.h.36.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 15138816
},
{
"name": "transformer.h.36.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17031168
},
{
"name": "transformer.h.36.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17043456
},
{
"name": "transformer.h.36.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17055744
},
{
"name": "transformer.h.36.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17068032
},
{
"name": "transformer.h.36.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 17080320
},
{
"name": "transformer.h.36.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 17129472
},
{
"name": "transformer.h.36.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 24698880
},
{
"name": "transformer.h.36.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 24711168
},
{
"name": "transformer.h.37.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 32268288
}
],
"md5sum": "e5fcfb342b8bee0222630d5b8056b418"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 32891904,
"records": [
{
"name": "transformer.h.37.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 0
},
{
"name": "transformer.h.37.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 15769600
},
{
"name": "transformer.h.37.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17740800
},
{
"name": "transformer.h.37.attn.c_proj.q_weight",
"shape": [
6144,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15138816,
"byteOffset": 17753088
}
],
"md5sum": "91a93ee815e15e6686c627189fdd9802"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 60555264,
"records": [
{
"name": "transformer.h.37.mlp.c_fc.q_weight",
"shape": [
24576,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60555264,
"byteOffset": 0
}
],
"md5sum": "235091dc32bd2c5940a56bf38a93b74b"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 60456960,
"records": [
{
"name": "transformer.h.37.mlp.c_proj.q_weight",
"shape": [
6144,
2460
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 60456960,
"byteOffset": 0
}
],
"md5sum": "5a24ff37616bd91f062464c1d58c5457"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 32911872,
"records": [
{
"name": "transformer.h.37.attn.c_proj.q_scale",
"shape": [
6144,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1892352,
"byteOffset": 0
},
{
"name": "transformer.h.37.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1892352
},
{
"name": "transformer.h.37.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1904640
},
{
"name": "transformer.h.37.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1916928
},
{
"name": "transformer.h.37.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1929216
},
{
"name": "transformer.h.37.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 1941504
},
{
"name": "transformer.h.37.mlp.c_fc.q_scale",
"shape": [
24576,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7569408,
"byteOffset": 1990656
},
{
"name": "transformer.h.37.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9560064
},
{
"name": "transformer.h.37.mlp.c_proj.q_scale",
"shape": [
6144,
615
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7557120,
"byteOffset": 9572352
},
{
"name": "transformer.h.38.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 17129472
},
{
"name": "transformer.h.38.attn.c_attn.q_weight",
"shape": [
6400,
616
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15769600,
"byteOffset": 17142272
}
],
"md5sum": "f1153b9d8f2bd93ddb396f8bba73702c"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 1995776,
"records": [
{
"name": "transformer.h.38.attn.c_attn.q_scale",
"shape": [
6400,
154
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1971200,
"byteOffset": 0
},
{
"name": "transformer.h.38.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1971200
},
{
"name": "transformer.h.38.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 1983488
}
],
"md5sum": "74b011f32b1fdd177ce7594018282b16"
}
]
}