Skip to content

Commit 4542a33

Browse files
committed
Merge branch 'k_quant' of https://github.com/jiafatom/neural-compressor into k_quant
2 parents c3318cf + 6015feb commit 4542a33

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

neural_compressor/adaptor/ox_utils/weight_only.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def quant_tensor_k_quant_cpu(data, num_bits=4, group_size=32):
260260
scale: scale
261261
zero_point: zero point
262262
"""
263-
data = np.reshape(data, (-1, group_size)).astype(np.float32) # (nb, group_size)
263+
data = np.reshape(data, (-1, group_size)).astype(np.float32) # nb = data.shape[0], (nb, group_size)
264264
maxq = 2**num_bits - 1
265265
minq = 0
266266
sum_x2 = np.sum(data**2, axis=1, keepdims=True) # (nb, 1)
@@ -535,9 +535,7 @@ def rtn_quantize(
535535
# MatMulFpQ4 support 4 bits and 32 group_size with ort 1.16.0 and 1.16.1 versions, supported by CPU EP
536536
# MatMulNBits supports 4 bits and 2^n group_size with ort > 1.16.1, supported by CPU EP AND CUDA EP
537537
if algorithm == "k_quant":
538-
q_weight, scale, zp = quant_tensor_k_quant_cuda(
539-
weight.T, num_bits, group_size
540-
)
538+
q_weight, scale, zp = quant_tensor_k_quant_cuda(weight.T, num_bits, group_size)
541539
else:
542540
q_weight, scale, zp = quant_tensor(
543541
weight.T, num_bits, group_size, scheme, "uint", ratios.get(node.input[1], 1)

0 commit comments

Comments
 (0)