Skip to content

Commit

Permalink
Make sure output is float with y_zp not present
Browse files Browse the repository at this point in the history
  • Loading branch information
yuslepukhin committed Sep 19, 2024
1 parent 9601793 commit 9ca55b4
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions onnxruntime/contrib_ops/cpu/quantization/quant_gemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,16 @@ static void HandleZeroKCase(const Tensor& a_scale, const Tensor& b_scale, Tensor
const auto N = narrow<Eigen::Index>(output_dims[1]);

if (y_zp == nullptr) {
// Because y_zp is not provided, the output is float32
// Either fill with bias_data if present or 0
uint8_t* output = reinterpret_cast<uint8_t*>(y.MutableDataRaw());
ORT_ENFORCE(y.SizeInBytes() == SafeInt<size_t>(M) * N * sizeof(float), "Output must be sized for float");
float* output = reinterpret_cast<float*>(y.MutableDataRaw());
if (bias != nullptr) {
auto clip_fn = [](uint32_t v) -> uint8_t { return static_cast<uint8_t>(v & 0xFF); };
auto to_float = [](uint32_t v) -> float { return static_cast<float>(v); };
GemmBroadcastBiasAndApplyFn(M, N, bias->Data<int32_t>(),
bias->Shape(), output, clip_fn);
bias->Shape(), output, to_float);
} else {
EigenMatrixMapRowMajor<uint8_t> output_mat(output, M, N);
EigenMatrixMapRowMajor<float> output_mat(output, M, N);
output_mat.setZero();
}
} else {
Expand Down

0 comments on commit 9ca55b4

Please sign in to comment.