utils.cc source code [tvm/src/relay/qnn/utils.cc]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file src/relay/qnn/utils.cc
22	* \brief Utility functions for QNN.
23	*/
24
25	#include "utils.h"
26
27	#include "../transforms/pattern_utils.h"
28
29	namespace tvm {
30	namespace relay {
31	namespace qnn {
32
33	std::pair<int32_t, int32_t> GetFixedPointMultiplierShift(double double_multiplier) {
34	int32_t significand, exponent;
35	if (double_multiplier == `0.`) {
36	significand = `0`;
37	exponent = `0`;
38	return std::make_pair(significand, exponent);
39	}
40
41	// Get the significand and exponent.
42	double significand_d = std::frexp(double_multiplier, &exponent);
43
44	// Convert the double significand to int significand, i.e., convert into a
45	// integer where the decimal point is between bit 31 and 30. This is done by
46	// multiplying the double value with 2^31 and then casting to int.
47	significand_d = std::round(significand_d * (`1ll` << `31`));
48	auto significand_int64 = static_cast<int64_t>(significand_d);
49	ICHECK_LE(significand_int64, (`1ll` << `31`));
50	if (significand_int64 == (`1ll` << `31`)) {
51	significand_int64 /= `2`;
52	++exponent;
53	}
54	ICHECK_LE(significand_int64, std::numeric_limits<int32_t>::max());
55	significand = static_cast<int32_t>(significand_int64);
56	return std::make_pair(significand, exponent);
57	}
58
59	Expr FixedPointMultiplyToNearest(Expr tensor, double multiplier,
60	const Array<IndexExpr>& input_shape) {
61	// Choose high precision datatype to be int64. This is for avoiding overflow
62	// in multiplication of two int32 values.
63	DataType hp_dtype = DataType::Int(`64`);
64	tensor = Cast(tensor, hp_dtype);
65
66	// 1) Calculating the integer multiplier and integer shift
67	auto [fixed_point_multiplier, shift] = GetFixedPointMultiplierShift(multiplier);
68	int left_shift = shift > `0` ? shift : `0`;
69	int right_shift = shift > `0` ? `0` : -shift;
70
71	// 2) Multiply the integer multiplier
72	if (left_shift != `0`) {
73	tensor = LeftShift(tensor, MakeConstantScalar(hp_dtype, left_shift));
74	}
75
76	// 3) Perform the multiplication in higher precision.
77	// The scalar is a fixed point value of int32 where the decimal point is
78	// between bits 31 and 30. After multiplying with input_tensor, the result
79	// is in int64 where the decimal point is sitting between bits 31 and 30
80	// (from the right, rightmost bit is bit 0). The computation is performed in
81	// higher precision to avoid overflow in multiplying two int32 values.
82	Expr scalar = MakeConstantScalar(hp_dtype, fixed_point_multiplier);
83	tensor = Multiply(tensor, scalar);
84
85	// 4) Find the rounding scalar. This depends on where the final decimal
86	// point sits. As we will be right shifting the multiplied_t, we need to
87	// first calculate the total_right_shift.
88	int total_right_shift = right_shift + `31`;
89	int64_t pos_rounding_value = (`1ll` << (total_right_shift - `1`));
90
91	Expr round_scalar;
92
93	auto pos_rounder = MakeConstantScalar(hp_dtype, pos_rounding_value);
94	auto neg_rounder = MakeConstantScalar(hp_dtype, pos_rounding_value - `1`);
95	auto pos_rounder_t = Full(pos_rounder, input_shape, hp_dtype);
96	auto neg_rounder_t = Full(neg_rounder, input_shape, hp_dtype);
97
98	auto zero_t = Zeros(input_shape, hp_dtype);
99	round_scalar = Where(GreaterEqual(tensor, zero_t), pos_rounder_t, neg_rounder_t);
100
101	// Add the rounding scalar.
102	tensor = Add(tensor, round_scalar);
103
104	// 5) Simply right shift the result to get the final output.
105	tensor = RightShift(tensor, MakeConstantScalar(hp_dtype, total_right_shift));
106
107	// 6) The fixed point multiplication keeps the value in int32 range. Casting back to int32.
108	return Cast(tensor, DataType::Int(`32`));
109	}
110
111	Expr FixedPointMultiplyPerChannel(Expr tensor, const std::vector<double>& multipliers, int axis) {
112	DataType dtype = DataType::Int(`32`);
113	int64_t n_channels = static_cast<int64_t>(multipliers.size());
114
115	std::vector<int32_t> fixed_pt_multipliers, lshifts, rshifts;
116	bool is_lshift_required = false, is_rshift_required = false;
117	for (auto multiplier : multipliers) {
118	auto [fixed_pt_multiplier, shift] = GetFixedPointMultiplierShift(multiplier);
119	int lshift = shift > `0` ? shift : `0`;
120	int rshift = shift > `0` ? `0` : -shift;
121	fixed_pt_multipliers.push_back(fixed_pt_multiplier);
122	lshifts.push_back(lshift);
123	rshifts.push_back(rshift);
124	is_lshift_required = is_lshift_required \| (lshift != `0`);
125	is_rshift_required = is_rshift_required \| (rshift != `0`);
126	}
127
128	auto left_shift_expr = MakeConstantTensor(dtype, {n_channels}, lshifts);
129	auto right_shift_expr = MakeConstantTensor(dtype, {n_channels}, rshifts);
130	auto fixed_pt_multiplier_expr = MakeConstantTensor(dtype, {n_channels}, fixed_pt_multipliers);
131
132	return FixedPointMultiplyPerAxis(tensor, fixed_pt_multiplier_expr, left_shift_expr,
133	right_shift_expr, is_lshift_required, is_rshift_required,
134	{axis});
135	}
136
137	Expr FixedPointMultiplyPerChannel(Expr tensor, std::vector<double> multipliers,
138	const Array<IndexExpr>& input_shape, int channel_axis,
139	const std::string& rounding) {
140	// Get the n dim. This will be used to expand the multiplier to match the axis.
141	size_t n_dim = input_shape.size();
142
143	// Get the num of channels/axis along which the tensor was quantized.
144	int64_t n_channels = (int64_t)multipliers.size();
145
146	// Choose high precision datatype to be int64. This is for avoiding overflow
147	// in multiplication of two int32 values.
148	DataType hp_dtype = DataType::Int(`64`);
149	tensor = Cast(tensor, hp_dtype);
150
151	// 1) Calculating the integer multiplier and integer shift. These are calculated per axis/per
152	// channel.
153	std::vector<int32_t> fixed_pt_multipliers, lshifts, rshifts;
154	bool is_lshift_required = false;
155	for (auto multiplier : multipliers) {
156	auto [fixed_pt_multiplier, shift] = GetFixedPointMultiplierShift(multiplier);
157	int lshift = shift > `0` ? shift : `0`;
158	int rshift = shift > `0` ? `0` : -shift;
159	fixed_pt_multipliers.push_back(fixed_pt_multiplier);
160	lshifts.push_back(lshift);
161	rshifts.push_back(rshift);
162	is_lshift_required = is_lshift_required \| (lshift != `0`);
163	}
164
165	// 2) Multiply the integer multiplier. Convert lefts shifts into expr and multiply.
166	if (is_lshift_required) {
167	auto lshift_expr = MakeConstantTensor(hp_dtype, {n_channels}, lshifts);
168	auto exp_lshift_expr = ExpandBiasToMatchAxis(lshift_expr, n_dim, {channel_axis});
169	tensor = LeftShift(tensor, exp_lshift_expr);
170	}
171
172	// 3) Perform the multiplication in higher precision.
173	// The scalar is a fixed point value of int32 where the decimal point is
174	// between bits 31 and 30. After multiplying with input_tensor, the result
175	// is in int64 where the decimal point is sitting between bits 31 and 30
176	// (from the right, rightmost bit is bit 0). The computation is performed in
177	// higher precision to avoid overflow in multiplying two int32 values.
178	auto fixed_pt_multiplier_expr = MakeConstantTensor(hp_dtype, {n_channels}, fixed_pt_multipliers);
179	auto exp_fixed_pt_multiplier_expr =
180	ExpandBiasToMatchAxis(fixed_pt_multiplier_expr, n_dim, {channel_axis});
181	tensor = Multiply(tensor, exp_fixed_pt_multiplier_expr);
182
183	// 4) Find the rounding scalar. This depends on where the final decimal point sits. As we will be
184	// right shifting the multiplied_t, we need to first calculate the total_rshift. Further, we can
185	// calculate the pos and neg rounding offset.
186	std::vector<int64_t> pos_rounding_values, neg_rounding_values, total_rshifts;
187	for (auto rshift : rshifts) {
188	int total_rshift = rshift + `31`;
189	total_rshifts.push_back(total_rshift);
190	pos_rounding_values.push_back((`1ll` << (total_rshift - `1`)));
191	neg_rounding_values.push_back((`1ll` << (total_rshift - `1`)) - `1`);
192	}
193	// Make a Relay expr from positive and negative rounding offset values.
194	auto pos_rounding_value_expr = MakeConstantTensor(hp_dtype, {n_channels}, pos_rounding_values);
195	auto exp_pos_rounding_value_expr =
196	ExpandBiasToMatchAxis(pos_rounding_value_expr, n_dim, {channel_axis});
197	auto neg_rounding_value_expr = MakeConstantTensor(hp_dtype, {n_channels}, neg_rounding_values);
198	auto exp_neg_rounding_value_expr =
199	ExpandBiasToMatchAxis(neg_rounding_value_expr, n_dim, {channel_axis});
200
201	Expr round_scalar;
202	if (rounding == "UPWARD") {
203	round_scalar = exp_pos_rounding_value_expr;
204	} else if (rounding == "TONEAREST") {
205	// To satisfy where op shape requirements, the rounding values are broadcasted.
206	auto pos_rounder = BroadCastTo(exp_pos_rounding_value_expr, input_shape);
207	auto neg_rounder = BroadCastTo(exp_neg_rounding_value_expr, input_shape);
208
209	auto zero_t = Zeros(input_shape, hp_dtype);
210	round_scalar = Where(GreaterEqual(tensor, zero_t), pos_rounder, neg_rounder);
211	} else {
212	LOG(FATAL) << "Rounding mode " << rounding << " not supported.";
213	}
214	// Add the rounding scalar.
215	tensor = Add(tensor, round_scalar);
216
217	// 5) Simply right shift the result to get the final output.
218	auto total_rshift_expr = MakeConstantTensor(hp_dtype, {n_channels}, total_rshifts);
219	auto exp_total_rshift_expr = ExpandBiasToMatchAxis(total_rshift_expr, n_dim, {channel_axis});
220	tensor = RightShift(tensor, exp_total_rshift_expr);
221
222	// 6) The fixed point multiplication keeps the value in int32 range. Casting back to int32.
223	return Cast(tensor, DataType::Int(`32`));
224	}
225
226	Expr FixedPointMultiplyPerChannelToNearest(Expr tensor, std::vector<double> multipliers,
227	const Array<IndexExpr>& input_shape, int channel_axis) {
228	return FixedPointMultiplyPerChannel(tensor, multipliers, input_shape, channel_axis, "TONEAREST");
229	}
230
231	std::string SelectRequntizeParameter(const std::string& arg_value, const std::string& cfg_value,
232	const bool is_cfg_default, const std::string& name) {
233	if (arg_value == "None") {
234	return cfg_value;
235	} else {
236	if (!is_cfg_default && arg_value != cfg_value) {
237	DLOG(INFO) << "The value of parameter \"" << name
238	<< "\" from the non-default requantize config will not be used. The value "
239	"provided from "
240	"requantize function argument will be used instead. The value used is \""
241	<< arg_value << "\".";
242	}
243	return arg_value;
244	}
245	}
246
247	} // namespace qnn
248	} // namespace relay
249	} // namespace tvm
250

Browse the source code of tvm/src/relay/qnn/utils.cc