Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support block granularity for QuantizeLinear and DequantizeLinear #3412

Merged
merged 11 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions src/onnx/include/migraphx/onnx/quantize_dequantize_linear.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_ONNX_QUANTIZE_DEQUANTIZE_LINEAR_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_ONNX_QUANTIZE_DEQUANTIZE_LINEAR_HPP

#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/instruction.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {

void transform_quantize_dequantize_linear_inputs(const onnx_parser::node_info& info,
const std::string& op_name,
int block_size,
int axis,
std::vector<instruction_ref>& args);

} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx

#endif
61 changes: 27 additions & 34 deletions src/onnx/parse_dequantizelinear.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -26,6 +26,7 @@
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/onnx/quantize_dequantize_linear.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand All @@ -38,49 +39,41 @@
instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
std::vector<instruction_ref>& args) const
{
int axis = 1;
if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i();

auto input_lens = args[0]->get_shape().lens();
auto n_dim = input_lens.size();

instruction_ref x_scale;
if(args[1]->get_shape().elements() != 1)
if(args.size() < 2 or args.size() > 3)
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
x_scale = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}), args[1]);
}
else
{
x_scale = info.add_instruction(make_op("multibroadcast", {{"out_lens", input_lens}}),
args[1]);
MIGRAPHX_THROW("DequantizeLinear: must have either 2 or 3 inputs, " +

Check warning on line 46 in src/onnx/parse_dequantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_dequantizelinear.cpp#L46

Added line #L46 was not covered by tests
std::to_string(args.size()) + " inputs provided");
}

if(args.size() == 3)
{
auto x_zero_point = args[2];
if(x_zero_point->get_shape().elements() != 1)
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
x_zero_point = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}),
x_zero_point);
}
else
if(args[0]->get_shape().type() != args[2]->get_shape().type())
MIGRAPHX_THROW("DequantizeLinear: x and y_zero_point must be of same type");

Check warning on line 53 in src/onnx/parse_dequantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_dequantizelinear.cpp#L53

Added line #L53 was not covered by tests

if(args[1]->get_shape().lens() != args[2]->get_shape().lens())
{
x_zero_point = info.add_instruction(
make_op("multibroadcast", {{"out_lens", input_lens}}), x_zero_point);
MIGRAPHX_THROW("DequantizeLinear: y_scale and y_zero_point shapes must be equal. "

Check warning on line 57 in src/onnx/parse_dequantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_dequantizelinear.cpp#L57

Added line #L57 was not covered by tests
Copy link
Contributor

@lakhinderwalia lakhinderwalia Sep 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: "DequantizeLinear: y_scale and y_zero_point shape mismatch."

"Provided y_scale "
"shape: " +
to_string_range(args[1]->get_shape().lens()) +
", provided y_zero_point shape: " +
to_string_range(args[2]->get_shape().lens()));
}

return info.add_instruction(
make_op("dequantizelinear"), args[0], x_scale, x_zero_point);
}

return info.add_instruction(make_op("dequantizelinear"), args[0], x_scale);
int axis = 1;
if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i();

int block_size = 0;
if(contains(info.attributes, "block_size"))
block_size = info.attributes.at("block_size").i();

transform_quantize_dequantize_linear_inputs(info, opd.op_name, block_size, axis, args);

return info.add_instruction(make_op("dequantizelinear"), args);
}
};

Expand Down
84 changes: 56 additions & 28 deletions src/onnx/parse_quantizelinear.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -27,6 +27,7 @@
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/common.hpp>
#include <migraphx/onnx/quantize_dequantize_linear.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand All @@ -37,47 +38,74 @@
std::vector<op_desc> operators() const { return {{"QuantizeLinear"}}; }

instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/,
const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
std::vector<instruction_ref>& args) const
{
if(args.size() < 2 or args.size() > 3)
{
MIGRAPHX_THROW("QuantizeLinear: must have either 2 or 3 inputs, " +

Check warning on line 47 in src/onnx/parse_quantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_quantizelinear.cpp#L47

Added line #L47 was not covered by tests
std::to_string(args.size()) + " inputs provided");
}

// Starting with version 19 ONNX introduced the constraint that x and y_scale types must be
// the same
if(parser.opset_version >= 19 and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a matter of general approach, if common_type (below) can be safely derived even for version prior to 19, is it okay to not flag errors for type mismatch -- i.e. by looking at Opset version? This is just for my understanding -- I am not suggesting a code change here. Thanks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have to flag it because the onnx spec states that it's a constraint for versions 19 and up.
The common type derivation and conversion could be done for all cases, without a version check condition, but It'd be doing the extra work of common type calculation and looping over the arguments for opset versions 19+ to no avail, since we already know that the types are the same for that case.

args[0]->get_shape().type() != args[1]->get_shape().type())
{
MIGRAPHX_THROW("QuantizeLinear: x and y_scale must be of same type");

Check warning on line 56 in src/onnx/parse_quantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_quantizelinear.cpp#L56

Added line #L56 was not covered by tests
}

if(args.size() == 3 and args[1]->get_shape().lens() != args[2]->get_shape().lens())
{
MIGRAPHX_THROW(

Check warning on line 61 in src/onnx/parse_quantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_quantizelinear.cpp#L61

Added line #L61 was not covered by tests
"QuantizeLinear: y_scale and y_zero_point shapes must be equal. Provided y_scale "
"shape: " +
to_string_range(args[1]->get_shape().lens()) +
", provided y_zero_point shape: " + to_string_range(args[2]->get_shape().lens()));
}

int axis = 1;
if(contains(info.attributes, "axis"))
axis = info.attributes.at("axis").i();

auto input_lens = args[0]->get_shape().lens();
auto n_dim = input_lens.size();
int block_size = 0;
if(contains(info.attributes, "block_size"))
block_size = info.attributes.at("block_size").i();

instruction_ref y_scale = args[1];
if(args[1]->get_shape().elements() != 1)
std::optional<migraphx::shape::type_t> output_type;
if(contains(info.attributes, "output_dtype"))
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
y_scale = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}), args[1]);
output_type = get_type(info.attributes.at("output_dtype").i());

Check warning on line 79 in src/onnx/parse_quantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_quantizelinear.cpp#L79

Added line #L79 was not covered by tests
}

auto common_args = add_common_args(*info.mod, {args[0], y_scale});

if(args.size() == 3)
if(output_type.has_value() and args.size() == 3 and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style: Please do the exception processing in one clause, on line 59 above.

*output_type != args[2]->get_shape().type())
{
auto y_zero_point = args[2];
if(y_zero_point->get_shape().elements() != 1)
{
auto tuned_axis = tune_axis(n_dim, axis, opd.op_name);
y_zero_point = info.add_instruction(
make_op("broadcast", {{"axis", tuned_axis}, {"out_lens", input_lens}}),
y_zero_point);
}
else
{
y_zero_point = info.add_instruction(
make_op("multibroadcast", {{"out_lens", input_lens}}), y_zero_point);
}
MIGRAPHX_THROW(

Check warning on line 85 in src/onnx/parse_quantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_quantizelinear.cpp#L85

Added line #L85 was not covered by tests
"QuantizeLinear: output_type and y_zero_point type must match. output_type: " +
to_string(*output_type) +
+", y_zero_point type: " + to_string(args[2]->get_shape().type()));
}

transform_quantize_dequantize_linear_inputs(info, opd.op_name, block_size, axis, args);

common_args.push_back(y_zero_point);
if(parser.opset_version < 19)
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are only two types supported for T1 before version 19. I appreciate your thoroughness in following up those details. But it isn't clear that this operator should then support input type x as either float or int32. And later version should additionally support bfloat16, float16. Thanks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about adding these as well, but decided against mainly because I haven't noticed that it's common practice to have the type constraints checked in parser code, although I might be wrong here.

auto common_type = common_shape({args[0]->get_shape(), args[1]->get_shape()}).type();
std::transform(args.begin(), args.begin() + 2, args.begin(), [&](auto ins) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just trying to understand here: Why is it args.begin() + 2. And not args.end(). Thanks.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prior to version 19, the first two inputs(x and y_scales) can have different float types, so a conversion to common type is needed to make the mgx operator work. The optional third input will have a type of int8 or uint8, and we want to leave it that way.

if(ins->get_shape().type() != common_type)
ins = info.add_instruction(make_op("convert", {{"target_type", common_type}}),
ins);
return ins;
});
}

return info.add_instruction(make_op("quantizelinear"), common_args);
if(output_type.has_value())
return info.add_instruction(make_op("quantizelinear", {{"out_type", *output_type}}),
args);

Check warning on line 106 in src/onnx/parse_quantizelinear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/parse_quantizelinear.cpp#L105-L106

Added lines #L105 - L106 were not covered by tests
else
return info.add_instruction(make_op("quantizelinear"), args);
}
};

Expand Down
139 changes: 139 additions & 0 deletions src/onnx/quantize_dequantize_linear.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#include <migraphx/onnx/quantize_dequantize_linear.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/common.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {

void transform_quantize_dequantize_linear_inputs(const onnx_parser::node_info& info,
const std::string& op_name,
int block_size,
int axis,
std::vector<instruction_ref>& args)
CharlieL7 marked this conversation as resolved.
Show resolved Hide resolved
{
const auto x = args.at(0);
const auto x_lens = x->get_shape().lens();
const auto x_rank = x_lens.size();

instruction_ref y_scale = args.at(1);
const auto y_scale_lens = y_scale->get_shape().lens();
const auto y_scale_rank = y_scale_lens.size();

// Per-tensor (per-layer) granularity
if(y_scale->get_shape().elements() == 1)
{
std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) {
return info.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), ins);
});
}
// Per-axis granularity
else if(y_scale_rank == 1)
{
axis = tune_axis(x_rank, axis, op_name);
if(x_lens[axis] != y_scale_lens[0])
{
MIGRAPHX_THROW(op_name + ": For per axis granularity the length of y_scale (actual: " +

Check warning on line 62 in src/onnx/quantize_dequantize_linear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/quantize_dequantize_linear.cpp#L62

Added line #L62 was not covered by tests
to_string(y_scale_lens[0]) + ") must be equal to size of x on axis " +
to_string(axis) + "(actual :" + to_string(x_lens[axis]) + ")");
}

std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) {
return info.add_instruction(
make_op("broadcast", {{"axis", axis}, {"out_lens", x_lens}}), ins);
});
}
// Blocked granularity
else
{
axis = tune_axis(x_rank, axis, op_name);
if(block_size == 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our quark generated graph doesn't use an explicit block_size. So this assumption about its being 0 needs to be tweaked a little bit. I think this parameter is optional. So we should work well in case it isn't supplied -- and not assume it is 0 then, and its final value should be computed to be = block_size_min. OTOH, if block_size is supplied with a model, and it isn't a 0, then we should compare it within the lower and upper bounds. Thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Therefore, please remove this exception clause.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ONNX spec states it is an optional attribute, with a default value of 0:
https://onnx.ai/onnx/operators/onnx__QuantizeLinear.html#attributes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have the quark-generated graph compiling with your current code. I can change this code later. Thanks.

{
MIGRAPHX_THROW(op_name + ": Invalid blocksize(0)");

Check warning on line 78 in src/onnx/quantize_dequantize_linear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/quantize_dequantize_linear.cpp#L78

Added line #L78 was not covered by tests
}

if(x_rank != y_scale_rank)
{
MIGRAPHX_THROW(op_name + ": x(rank: " + to_string(x_rank) +

Check warning on line 83 in src/onnx/quantize_dequantize_linear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/quantize_dequantize_linear.cpp#L83

Added line #L83 was not covered by tests
") and y_scale(rank: " + to_string(y_scale_rank) +
") must be of same rank for block granularity");
}

for(auto i = 0u; i < x_lens.size(); ++i)
{
if(x_lens[i] != y_scale_lens[i] and i != axis)
{
MIGRAPHX_THROW(op_name + ": x(shape: " + to_string_range(x_lens) +

Check warning on line 92 in src/onnx/quantize_dequantize_linear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/quantize_dequantize_linear.cpp#L92

Added line #L92 was not covered by tests
") and y_scale(shape: " + to_string_range(y_scale_lens) +
") shapes may only differ along provided axis(" + to_string(axis) +
")");
}
}

// Given x shape (D0, ..., Di, ..., Dn), y_scale shape (S0, ... Si, ...Sn) and
// axis=i, the accepted range is [ceil(Di/Si), ceil(Di/(Si-1))-1]
float di = x_lens[axis];
float si = y_scale_lens[axis];
int block_size_min = std::ceil(di / si);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sample code that can be added below if exception above is removed -- for block_size == 0.

if(block_size ==0) block_size = block_size_min;

int block_size_max = std::ceil(di / (si - 1)) - 1;
if(block_size < block_size_min or block_size > block_size_max)
MIGRAPHX_THROW(op_name + ": Block size(actual: " + to_string(block_size) +

Check warning on line 106 in src/onnx/quantize_dequantize_linear.cpp

View check run for this annotation

Codecov / codecov/patch

src/onnx/quantize_dequantize_linear.cpp#L106

Added line #L106 was not covered by tests
") must be within range [" + to_string(block_size_min) + ", " +
to_string(block_size_max) + "]");

std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) {
if(block_size == 1)
return ins;

ins = info.add_instruction(make_op("unsqueeze", {{"axes", {axis + 1}}}), ins);

auto bc_lens = ins->get_shape().lens();
bc_lens[axis + 1] = block_size;
ins = info.add_instruction(make_op("multibroadcast", {{"out_lens", bc_lens}}), ins);

auto reshape_lens = x_lens;
reshape_lens[axis] = ins->get_shape().lens()[axis] * block_size;
ins = info.add_instruction(make_op("reshape", {{"dims", reshape_lens}}), ins);

// Detect runt block
if(x_lens[axis] < reshape_lens[axis])
{
ins = info.add_instruction(
make_op("slice", {{"axes", {axis}}, {"starts", {0}}, {"ends", {x_lens[axis]}}}),
ins);
}

return ins;
});
}
}

} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Binary file not shown.
Loading
Loading