utils.h source code [tvm/src/relay/qnn/utils.h]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file src/relay/qnn/utils.h
22	* \brief Utility methods needs for quantized ops that can be shared
23	*/
24
25	#ifndef TVM_RELAY_QNN_UTILS_H_
26	#define TVM_RELAY_QNN_UTILS_H_
27
28	#include <tvm/relay/expr.h>
29	#include <tvm/relay/qnn/attrs.h>
30	#include <tvm/tir/expr.h>
31	#include <tvm/tir/op.h>
32
33	#include <limits>
34	#include <string>
35	#include <utility>
36	#include <vector>
37
38	#include "./op/requantize_config.h"
39
40	namespace tvm {
41	namespace relay {
42	namespace qnn {
43
44	static inline Array<IndexExpr> get_shape(const Type& type) {
45	auto input_tt = type.as<TensorTypeNode>();
46	ICHECK(input_tt != nullptr) << "Type information missing."
47	<< " Please run infer_type pass.";
48	return input_tt->shape;
49	}
50
51	static inline int32_t GetQmin(const DataType& dtype) {
52	ICHECK_LE(dtype.bits(), `32`) << "QNN ops support int32 or lower precision";
53	if (dtype.is_int() \|\| dtype.is_uint()) {
54	auto min_value_expr = tvm::min_value(dtype);
55	auto* min_value = tir::as_const_int(min_value_expr);
56	ICHECK(min_value != nullptr);
57	return static_cast<int32_t>(min_value[`0`]);
58	} else {
59	LOG(FATAL) << "Type not supported " << dtype;
60	}
61	}
62
63	static inline int32_t GetQmax(const DataType& dtype) {
64	ICHECK_LE(dtype.bits(), `32`) << "QNN ops support int32 or lower precision";
65	if (dtype.is_int() \|\| dtype.is_uint()) {
66	auto max_value_expr = tvm::max_value(dtype);
67	auto* max_value = tir::as_const_int(max_value_expr);
68	ICHECK(max_value != nullptr);
69	return static_cast<int32_t>(max_value[`0`]);
70	} else {
71	LOG(FATAL) << "Type not supported " << dtype;
72	}
73	}
74
75	/*
76	* \brief Convert FP32 representation into fixed point representation.
77	* \param double_multplier The input FP32 number.
78	* \return The pair of multiplier and shift for fixed point representation.
79	* \note Converts a floating point number so that it can be represented by
80	* integers. The representation is
81	* float_number = (significand) * 2^(exponent)
82	*
83	* The significand is a number between 0.5 and 1. This is represented by
84	* an integer number. For example, if it is int32, then the decimal point
85	* exists between bit 31 and 30 from LSB (or between first and second bit
86	* from the left).
87	*
88	* Some examples are
89	* 0.25 = (0.5) * 2^(-1)
90	* 0.125 = (0.5) * 2^(-2)
91	*
92	* Credit to TFLite reference implementation.
93	*/
94	std::pair<int32_t, int32_t> GetFixedPointMultiplierShift(double double_multiplier);
95
96	Expr RequantizeLower(const Expr& input_tensor, const Expr& input_scale,
97	const Expr& input_zero_point, const Expr& output_scale,
98	const Expr& output_zero_point, const RequantizeAttrs* param,
99	const Array<IndexExpr>& input_shape, const DataType& out_dtype);
100
101	std::string SelectRequntizeParameter(const std::string& arg_value, const std::string& cfg_value,
102	const bool is_cfg_default, const std::string& name);
103
104	static inline Expr Requantize(const Expr& data, const Array<IndexExpr>& input_shape,
105	const Expr& input_scale, const Expr& input_zero_point,
106	const Expr& output_scale, const Expr& output_zero_point,
107	const DataType& out_dtype, const int& axis = -`1`,
108	const std::string& rounding = "None",
109	const std::string& compute_dtype = "None") {
110	auto attrs = make_object<RequantizeAttrs>();
111	attrs ->axis = axis;
112	attrs ->out_dtype = std::move(out_dtype);
113	const RequantizeConfig& cfg = RequantizeConfig::Current();
114	attrs ->rounding =
115	SelectRequntizeParameter(rounding, cfg ->get_rounding(), cfg ->is_default, "rounding");
116	attrs ->compute_dtype = SelectRequntizeParameter(compute_dtype, cfg ->get_compute_dtype(),
117	cfg ->is_default, "compute_dtype");
118	return RequantizeLower(data, input_scale, input_zero_point, output_scale, output_zero_point,
119	attrs.operator->(), input_shape, attrs ->out_dtype);
120	}
121
122	Expr MakeRequantize(Expr data, Expr input_scale, Expr input_zero_point, Expr output_scale,
123	Expr output_zero_point, int axis, String rounding, String compute_dtype,
124	DataType out_dtype);
125
126	Expr DequantizeLower(const Expr& input_tensor, const Expr& input_scale,
127	const Expr& input_zero_point, const Array<tvm::relay::Type>& types,
128	const DequantizeAttrs* attrs);
129
130	static inline Expr Dequantize(const Expr& data, const Expr& input_scale,
131	const Expr& input_zero_point, const Array<tvm::relay::Type>& types,
132	const int& axis = -`1`) {
133	auto attrs = make_object<DequantizeAttrs>();
134	attrs ->axis = std::move(axis);
135
136	return DequantizeLower(data, input_scale, input_zero_point, types, attrs.operator->());
137	}
138	Expr MakeDequantize(Expr data, Expr input_scale, Expr input_zero_point, int axis);
139
140	Expr QuantizeLower(const Expr& input_tensor, const Expr& output_scale,
141	const Expr& output_zero_point, const Array<tvm::relay::Type>& types,
142	const QuantizeAttrs* attrs);
143
144	static inline Expr Quantize(const Expr& data, const Expr& output_scale,
145	const Expr& output_zero_point, const DataType& out_dtype,
146	const Array<tvm::relay::Type>& types, const int& axis = -`1`) {
147	auto attrs = make_object<QuantizeAttrs>();
148	attrs ->axis = std::move(axis);
149	attrs ->out_dtype = std::move(out_dtype);
150
151	return QuantizeLower(data, output_scale, output_zero_point, types, attrs.operator->());
152	}
153	Expr MakeQuantize(Expr data, Expr output_scale, Expr output_zero_point, int axis,
154	DataType out_dtype);
155
156	static inline int64_t get_const_int(const tvm::PrimExpr& x) {
157	auto* value_ptr = tir::as_const_int(x);
158	ICHECK(value_ptr) << "Expr is not a constant int";
159	return value_ptr[`0`];
160	}
161
162	/*
163	* \brief Fixed point multiplication between integer tensor with floating point
164	* scalar. This implementation rounds to the nearest value when it is midway
165	* between two representable values.
166	* \param tensor The quantized input tensor of dtype int64.
167	* \param multiplier The scalar multiplier.
168	* \param input_shape Shape of the input tensor.
169	* \return The sequence of Relay ops for fixed point multiplication with TONEARES rounding.
170
171	* \note Original compuation is scale_fp32 * quantized_tensor. To convert into
172	* integer computation, the multiplication with fp32 scalar can be
173	* replaced by multiplication with an int value and then right shifting
174	* the result. This approximates the floating point computation with a
175	* fixed point computation.
176	*
177	* Computation of fixed point multiplication is consist of following
178	steps:
179	* 1) Multiply the fixed point multiplier with quantized tensor.
180	* 2) Round the result.
181	* 3) Right shift the result
182	*/
183	Expr FixedPointMultiplyToNearest(Expr tensor, double multiplier,
184	const Array<IndexExpr>& input_shape);
185
186	/*
187	* \brief Fixed point multiplication between integer tensor with floating point
188	scalar where the input tensor is per-axis/per-channel quantized..
189	* \param tensor The quantized input tensor of dtype int64.
190	* \param multiplier The scalar multiplier.
191	* \param input_shape Shape of the input tensor.
192	* \param channel_axis The channel_axis along which the input tensor is quantized. Default value is
193	-1 which corresponds to the last channel_axis.
194	* \param rounding "UPWARD" or "TONEAREST". The rounding direction when the value
195	is midway between" "two representable values.
196	* \return The sequence of Relay ops for fixed point multiplication.
197
198	* \note Original compuation is scale_fp32 * quantized_tensor. To convert into
199	* integer computation, the multiplication with fp32 vector can be
200	* replaced by multiplication with an int vector and then right shifting
201	* the result. This approximates the floating point computation with a
202	* fixed point computation.
203	*
204	* Computation of fixed point multiplication is consist of following
205	steps:
206	* 1) Multiply the fixed point multiplier with quantized tensor.
207	* 2) Round the result.
208	* 3) Right shift the result
209	*/
210	Expr FixedPointMultiplyPerChannel(Expr tensor, std::vector<double> multiplier,
211	const Array<IndexExpr>& input_shape, int channel_axis,
212	const std::string& rounding);
213
214	/*
215	* Wrapper for 'FixedPointMultiplyPerChannel' with rounding parameter == "TONEAREST".
216	*/
217	Expr FixedPointMultiplyPerChannelToNearest(Expr tensor, std::vector<double> multiplier,
218	const Array<IndexExpr>& input_shape, int channel_axis);
219
220	/*
221	* \brief Creates FixedPointMultiply operation where the input tensor is
222	per-axis/per-channel quantized..
223	* \param tensor The quantized input tensor.
224	* \param multipliers List of scalar multipliers.
225	* \param channel_axis The channel_axis along which the input tensor is quantized.
226	* \return The Relay op.
227	*/
228	Expr FixedPointMultiplyPerChannel(Expr tensor, const std::vector<double>& multipliers, int axis);
229
230	/*
231	* \brief Checks whether an expr type is scalar of a given data type.
232	* \param expr_type The type of expr to be checked.
233	* \param dtype The expected dtype.
234	* \return True if the type is a scalar of given dtype
235	*/
236	static inline bool IsScalarType(const Type& expr_type, const DataType& dtype) {
237	const auto* tensor_type = expr_type.as<TensorTypeNode>();
238	ICHECK(tensor_type) << "Only tensor type can be checked for scalar values. But got"
239	<< AsText(expr_type, false);
240	ICHECK_EQ(tensor_type->shape.size(), `0`);
241	ICHECK(tensor_type->dtype == dtype) << "Expected " << dtype << " but got " << tensor_type->dtype;
242	return true;
243	}
244
245	/*
246	* \brief Checks whether an expr type is scalar.
247	* \param expr_type The type of expr to be checked.
248	* \return True if the type is a scalar
249	*/
250	static inline bool IsScalarType(const Type& expr_type) {
251	const auto* tensor_type = expr_type.as<TensorTypeNode>();
252	CHECK(tensor_type) << "Only tensor type can be checked for scalar values. But got"
253	<< AsText(expr_type, false);
254	return tensor_type->shape.size() == `0`;
255	}
256
257	/*
258	* \brief Checks and assigns types to scale and zero points.
259	* \param expr_type The type of expr to be checked.
260	* \param dtype The expected dtype.
261	* \param shape The shape at C dim of original tensor.
262	* \param reporter The type reported of original InferType call.
263	*/
264	static inline void AssignType(const Type& expr_type, const DataType& dtype, const IndexExpr& shape,
265	const TypeReporter& reporter) {
266	// Scale/Zero_points can be either const scalar or a vector with C axis num elems.
267	const auto* tensor_type = expr_type.as<TensorTypeNode>();
268	ICHECK(tensor_type) << "Can assign type to Tensor type only. But got "
269	<< AsText(expr_type, false);
270	const auto tensor_dtype = tensor_type->dtype;
271	ICHECK(tensor_dtype == dtype) << "Expected type is " << dtype << " but received " << tensor_dtype;
272	if (tensor_type->shape.size() != `0`) {
273	reporter ->Assign(expr_type, TensorType ({shape}, tensor_type->dtype));
274	}
275	}
276
277	static inline std::vector<float> GetFloatVectorFromConstant(const Expr& expr) {
278	const auto* n = expr.as<ConstantNode>();
279	std::vector<float> vals;
280	ICHECK(n) << "Expr must be a constant expr - " << AsText(expr, false);
281	int64_t num_elems = `1`;
282	auto shape = n->data.Shape();
283	for (size_t i = `0`; i < shape.size(); i++) {
284	num_elems *= shape [i];
285	}
286	for (int64_t i = `0`; i < num_elems; i++) {
287	vals.push_back(static_cast<float*>(n->data ->data)[i]);
288	}
289	return vals;
290	}
291
292	Expr MakeQnnConv2D(Expr data, Expr weight, Expr input_zero_point, Expr kernel_zero_point,
293	Expr input_scale, Expr kernel_scale, Array<IndexExpr> strides,
294	Array<IndexExpr> padding, Array<IndexExpr> dilation, int groups,
295	IndexExpr channels, Array<IndexExpr> kernel_size, String data_layout,
296	String kernel_layout, String out_layout, DataType out_dtype);
297
298	} // namespace qnn
299	} // namespace relay
300	} // namespace tvm
301	#endif // TVM_RELAY_QNN_UTILS_H_
302

Browse the source code of tvm/src/relay/qnn/utils.h