You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\Scripts\optimum-cli.exe\__main__.py", line 7, in <module>
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\optimum\commands\optimum_cli.py", line 208, in main
service.run()
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\optimum\onnxruntime\subpackage\commands\quantize.py", line 105, in run
q.quantize(
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\optimum\onnxruntime\quantization.py", line 403, in quantize
quantizer.quantize_model()
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\onnxruntime\quantization\onnx_quantizer.py", line 211, in quantize_model
op_quantizer.quantize()
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\onnxruntime\quantization\operators\matmul.py", line 78, in quantize
otype = self.quantizer.get_tensor_type(node.output[0], mandatory=True)
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\onnxruntime\quantization\onnx_quantizer.py", line 271, in get_tensor_type
return self._get_default_tensor_type(tensor_name)
File "C:\Users\MoonDongWon\anaconda3\envs\rl_trader\lib\site-packages\onnxruntime\quantization\onnx_quantizer.py", line 250, in _get_default_tensor_type
raise RuntimeError(
RuntimeError: Unable to find data type for weight_name='/model/layers.0/self_attn/k_proj/MatMul_output_0'. shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.
The text was updated successfully, but these errors were encountered:
optimum-cli onnxruntime quantize --avx512 --onnx_model llama-3.2-3b-instruct-
qa-onnx -o quantized_llama-3.2-3b-instruct-qa-onnx
The text was updated successfully, but these errors were encountered: