calibrate.cc source code [tvm/src/relay/quantize/calibrate.cc]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	*
22	* \file calibrate.cc
23	*
24	* \brief Create profile graph and calibrate on dataset
25	*/
26	#include <tvm/relay/analysis.h>
27	#include <tvm/relay/expr_functor.h>
28	#include <tvm/relay/op.h>
29
30	#include <numeric>
31
32	#include "./quantize.h"
33
34	namespace tvm {
35	namespace relay {
36	namespace quantize {
37
38	// KL divergence minimization code is adapted from MXNet.
39	// The original one is in incubator-mxnet/src/operator/quantization/calibrate.cc
40	static std::vector<float> SmoothDistribution(const std::vector<float>& p,
41	const float eps = `0.0001`) {
42	std::vector<size_t> is_zeros(p.size());
43	std::vector<size_t> is_nonzeros(p.size());
44	{
45	auto it = p.begin();
46	std::generate(is_zeros.begin(), is_zeros.end(),
47	[&it]() { return static_cast<size_t>(*(it ++) == `0.f`); });
48	}
49	{
50	auto it = p.begin();
51	std::generate(is_nonzeros.begin(), is_nonzeros.end(),
52	[&it]() { return static_cast<size_t>(*(it ++) != `0.f`); });
53	}
54	size_t n_zeros = std::accumulate(is_zeros.begin(), is_zeros.end(), `0`);
55	size_t n_nonzeros = p.size() - n_zeros;
56	if (!n_nonzeros) {
57	// The discrete probability distribution is malformed. All entries are 0.
58	return std::vector<float>();
59	}
60	float eps1 = eps * static_cast<float>(n_zeros) / static_cast<float>(n_nonzeros);
61	if (eps1 >= `1.0`) return std::vector<float>();
62	auto ret = p;
63	for (size_t i = `0`; i < p.size(); i++) {
64	ret [i] += eps * is_zeros [i] - eps1 * is_nonzeros [i];
65	}
66	return ret;
67	}
68
69	static float ComputeEntropy(float* p, float* q, size_t size) {
70	float p_sum = std::accumulate(p, p + size, `0.f`);
71	float q_sum = std::accumulate(q, q + size, `0.f`);
72	float ret = `0`;
73	for (size_t i = `0`; i < size; i++) {
74	ICHECK(p[i] > `0` && q[i] > `0`);
75	p[i] /= p_sum;
76	q[i] /= q_sum;
77	if (p[i] && q[i]) ret += p[i] * std::log(p[i] / q[i]);
78	}
79	return ret;
80	}
81
82	float MinimizeKL(const std::vector<int>& hist, const std::vector<float>& hist_edges, int num_bins,
83	int num_quantized_bins) {
84	const int zero_bin_idx = num_bins / `2`;
85	const int num_half_quantized_bins = num_quantized_bins / `2`;
86	std::vector<float> thresholds(num_bins / `2` + `1` - num_quantized_bins / `2`, `0.f`);
87	std::vector<float> divergence(thresholds.size(), `0.f`);
88	std::vector<float> quantized_bins(num_quantized_bins, `0`);
89	for (int i = num_quantized_bins / `2`; i < zero_bin_idx + `1`; ++i) {
90	const int p_bin_idx_start = zero_bin_idx - i;
91	const int p_bin_idx_stop = zero_bin_idx + i + `1`;
92	thresholds [i - num_half_quantized_bins] = hist_edges [p_bin_idx_stop];
93
94	std::vector<int> sliced_nd_hist(p_bin_idx_stop - p_bin_idx_start);
95	std::vector<float> p(sliced_nd_hist.size());
96	p [`0`] = `0`;
97	p.back() = `0`;
98	for (int j = `0`; j < num_bins; j++) {
99	if (j <= p_bin_idx_start) {
100	p [`0`] += hist [j];
101	} else if (j >= p_bin_idx_stop) {
102	p.back() += hist [j];
103	} else {
104	sliced_nd_hist [j - p_bin_idx_start] = hist [j];
105	p [j - p_bin_idx_start] = hist [j];
106	}
107	}
108	// calculate how many bins should be merged to generate quantized distribution q
109	const auto num_merged_bins = sliced_nd_hist.size() / num_quantized_bins;
110	for (int j = `0`; j < num_quantized_bins; j++) {
111	const int start = j * num_merged_bins;
112	const int stop = (j + `1`) * num_merged_bins;
113	quantized_bins [j] =
114	std::accumulate(sliced_nd_hist.begin() + start, sliced_nd_hist.begin() + stop, `0`);
115	}
116	quantized_bins.back() += std::accumulate(
117	sliced_nd_hist.begin() + static_cast<int>(num_quantized_bins * num_merged_bins),
118	sliced_nd_hist.end(), `0`);
119	// expand quantized_bins into p.size bins
120	std::vector<float> q(sliced_nd_hist.size(), `0`);
121	for (int j = `0`; j < num_quantized_bins; j++) {
122	const int start = j * num_merged_bins;
123	const int stop = (j == num_quantized_bins - `1`) ? q.size() : ((j + `1`) * num_merged_bins);
124	int norm = std::count_if(sliced_nd_hist.begin() + start, sliced_nd_hist.begin() + stop,
125	[](size_t i) { return i != `0`; });
126	if (norm) {
127	for (int k = start; k < stop; k++) {
128	if (p [k]) q [k] = quantized_bins [j] / norm;
129	}
130	}
131	}
132	p = SmoothDistribution(p);
133	q = SmoothDistribution(q);
134
135	if (!q.size()) {
136	divergence [i - num_half_quantized_bins] = std::numeric_limits<float>::infinity();
137	} else {
138	divergence [i - num_half_quantized_bins] = ComputeEntropy(p.data(), q.data(), p.size());
139	}
140	}
141	auto min_divergence_idx =
142	std::distance(divergence.begin(), std::min_element(divergence.begin(), divergence.end()));
143	return thresholds [min_divergence_idx];
144	}
145
146	class StatsCollector : private ExprMutator {
147	public:
148	StatsCollector() : simulated_quantize_op_(Op::Get("relay.op.annotation.simulated_quantize")) {}
149
150	Expr Collect(const Expr& expr) {
151	auto new_e = this->Mutate(expr);
152	const FunctionNode* func = new_e.as<FunctionNode>();
153	ICHECK(func) << "Input shoule be Function";
154	Expr new_body = Tuple (std::move(profile_data_));
155	Function ret_func = WithFields(GetRef<Function>(func), FreeVars(new_body), new_body);
156
157	// We are changing the function's ret_type to an empty type. Unfortunately, Optional<Type>() is
158	// indistinguishable from NullValue<Type>(), so we can't express "update to nullptr" in
159	// WithFields.
160	ret_func.CopyOnWrite()->ret_type = NullValue<Type>();
161	return std::move(ret_func);
162	}
163
164	private:
165	Array<Expr> profile_data_;
166	const Op& simulated_quantize_op_;
167
168	Expr VisitExpr_(const CallNode* call) {
169	Expr new_e = ExprMutator::VisitExpr_(call);
170	const CallNode* new_call = new_e.as<CallNode>();
171	ICHECK(new_call);
172	if (new_call->op == simulated_quantize_op_) {
173	auto attrs = new_call->attrs.as<SimulatedQuantizeAttrs>();
174	// rewrite the annotation
175	auto new_attrs = make_object<SimulatedQuantizeAttrs>();
176	const Expr& quantize_input = new_call->args [`0`]; // expression being quantized
177	auto placeholder = MakeConstantScalar(DataType::Float(`32`), `0.`); // unused argument
178	Array<Expr> new_args{quantize_input, placeholder, placeholder, placeholder};
179	new_attrs ->kind = QAnnotateKind::kQIdentity;
180	new_attrs ->sign = attrs->sign;
181	new_attrs ->rounding = attrs->rounding;
182	Expr identity_quantize = Call (new_call->op, new_args, Attrs {new_attrs}, {});
183
184	// add non-const expressions to profile data
185	if (attrs->kind != QAnnotateKind::kQWeight) {
186	ICHECK(!quantize_input.as<ConstantNode>());
187	profile_data_.push_back(identity_quantize);
188	}
189	return identity_quantize;
190	} else {
191	return new_e;
192	}
193	}
194	};
195
196	/*
197	* \brief Given an annotated graph, create a profile graph to collect profile data from the
198	* calibration dataset.
199	*
200	* This pass collects simulated_quantize op into a tuple. Simulated_quantize ops are rewritten to
201	* identity mode. The tuple is the output of the profile graph. Both input and output of this pass
202	* are relay::Function.
203	*
204	* \param expr The simulation graph after annotation.
205	* \return The profile graph.
206	*/
207	Expr CreateStatsCollector(const Expr& expr) { return StatsCollector ().Collect(expr); }
208
209	TVM_REGISTER_GLOBAL("relay._quantize.CreateStatsCollector").set_body_typed(CreateStatsCollector);
210
211	TVM_REGISTER_GLOBAL("relay._quantize.FindScaleByKLMinimization")
212	.set_body([](TVMArgs args, TVMRetValue* ret) {
213	int* hist_ptr = static_cast<int>(static_cast<void**>(args [`0`]));
214	float* hist_edges_ptr = static_cast<float>(static_cast<void**>(args [`1`]));
215	int num_bins = args [`2`];
216	int num_quantized_bins = args [`3`];
217	std::vector<int> hist(hist_ptr, hist_ptr + num_bins);
218	std::vector<float> hist_edges(hist_edges_ptr, hist_edges_ptr + num_bins + `1`);
219	ret[`0`] = MinimizeKL(hist, hist_edges, num_bins, num_quantized_bins);
220	});
221
222	} // namespace quantize
223	} // namespace relay
224	} // namespace tvm
225

Browse the source code of tvm/src/relay/quantize/calibrate.cc