1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 *
22 * \file calibrate.cc
23 *
24 * \brief Create profile graph and calibrate on dataset
25 */
26#include <tvm/relay/analysis.h>
27#include <tvm/relay/expr_functor.h>
28#include <tvm/relay/op.h>
29
30#include <numeric>
31
32#include "./quantize.h"
33
34namespace tvm {
35namespace relay {
36namespace quantize {
37
38// KL divergence minimization code is adapted from MXNet.
39// The original one is in incubator-mxnet/src/operator/quantization/calibrate.cc
40static std::vector<float> SmoothDistribution(const std::vector<float>& p,
41 const float eps = 0.0001) {
42 std::vector<size_t> is_zeros(p.size());
43 std::vector<size_t> is_nonzeros(p.size());
44 {
45 auto it = p.begin();
46 std::generate(is_zeros.begin(), is_zeros.end(),
47 [&it]() { return static_cast<size_t>(*(it++) == 0.f); });
48 }
49 {
50 auto it = p.begin();
51 std::generate(is_nonzeros.begin(), is_nonzeros.end(),
52 [&it]() { return static_cast<size_t>(*(it++) != 0.f); });
53 }
54 size_t n_zeros = std::accumulate(is_zeros.begin(), is_zeros.end(), 0);
55 size_t n_nonzeros = p.size() - n_zeros;
56 if (!n_nonzeros) {
57 // The discrete probability distribution is malformed. All entries are 0.
58 return std::vector<float>();
59 }
60 float eps1 = eps * static_cast<float>(n_zeros) / static_cast<float>(n_nonzeros);
61 if (eps1 >= 1.0) return std::vector<float>();
62 auto ret = p;
63 for (size_t i = 0; i < p.size(); i++) {
64 ret[i] += eps * is_zeros[i] - eps1 * is_nonzeros[i];
65 }
66 return ret;
67}
68
69static float ComputeEntropy(float* p, float* q, size_t size) {
70 float p_sum = std::accumulate(p, p + size, 0.f);
71 float q_sum = std::accumulate(q, q + size, 0.f);
72 float ret = 0;
73 for (size_t i = 0; i < size; i++) {
74 ICHECK(p[i] > 0 && q[i] > 0);
75 p[i] /= p_sum;
76 q[i] /= q_sum;
77 if (p[i] && q[i]) ret += p[i] * std::log(p[i] / q[i]);
78 }
79 return ret;
80}
81
82float MinimizeKL(const std::vector<int>& hist, const std::vector<float>& hist_edges, int num_bins,
83 int num_quantized_bins) {
84 const int zero_bin_idx = num_bins / 2;
85 const int num_half_quantized_bins = num_quantized_bins / 2;
86 std::vector<float> thresholds(num_bins / 2 + 1 - num_quantized_bins / 2, 0.f);
87 std::vector<float> divergence(thresholds.size(), 0.f);
88 std::vector<float> quantized_bins(num_quantized_bins, 0);
89 for (int i = num_quantized_bins / 2; i < zero_bin_idx + 1; ++i) {
90 const int p_bin_idx_start = zero_bin_idx - i;
91 const int p_bin_idx_stop = zero_bin_idx + i + 1;
92 thresholds[i - num_half_quantized_bins] = hist_edges[p_bin_idx_stop];
93
94 std::vector<int> sliced_nd_hist(p_bin_idx_stop - p_bin_idx_start);
95 std::vector<float> p(sliced_nd_hist.size());
96 p[0] = 0;
97 p.back() = 0;
98 for (int j = 0; j < num_bins; j++) {
99 if (j <= p_bin_idx_start) {
100 p[0] += hist[j];
101 } else if (j >= p_bin_idx_stop) {
102 p.back() += hist[j];
103 } else {
104 sliced_nd_hist[j - p_bin_idx_start] = hist[j];
105 p[j - p_bin_idx_start] = hist[j];
106 }
107 }
108 // calculate how many bins should be merged to generate quantized distribution q
109 const auto num_merged_bins = sliced_nd_hist.size() / num_quantized_bins;
110 for (int j = 0; j < num_quantized_bins; j++) {
111 const int start = j * num_merged_bins;
112 const int stop = (j + 1) * num_merged_bins;
113 quantized_bins[j] =
114 std::accumulate(sliced_nd_hist.begin() + start, sliced_nd_hist.begin() + stop, 0);
115 }
116 quantized_bins.back() += std::accumulate(
117 sliced_nd_hist.begin() + static_cast<int>(num_quantized_bins * num_merged_bins),
118 sliced_nd_hist.end(), 0);
119 // expand quantized_bins into p.size bins
120 std::vector<float> q(sliced_nd_hist.size(), 0);
121 for (int j = 0; j < num_quantized_bins; j++) {
122 const int start = j * num_merged_bins;
123 const int stop = (j == num_quantized_bins - 1) ? q.size() : ((j + 1) * num_merged_bins);
124 int norm = std::count_if(sliced_nd_hist.begin() + start, sliced_nd_hist.begin() + stop,
125 [](size_t i) { return i != 0; });
126 if (norm) {
127 for (int k = start; k < stop; k++) {
128 if (p[k]) q[k] = quantized_bins[j] / norm;
129 }
130 }
131 }
132 p = SmoothDistribution(p);
133 q = SmoothDistribution(q);
134
135 if (!q.size()) {
136 divergence[i - num_half_quantized_bins] = std::numeric_limits<float>::infinity();
137 } else {
138 divergence[i - num_half_quantized_bins] = ComputeEntropy(p.data(), q.data(), p.size());
139 }
140 }
141 auto min_divergence_idx =
142 std::distance(divergence.begin(), std::min_element(divergence.begin(), divergence.end()));
143 return thresholds[min_divergence_idx];
144}
145
146class StatsCollector : private ExprMutator {
147 public:
148 StatsCollector() : simulated_quantize_op_(Op::Get("relay.op.annotation.simulated_quantize")) {}
149
150 Expr Collect(const Expr& expr) {
151 auto new_e = this->Mutate(expr);
152 const FunctionNode* func = new_e.as<FunctionNode>();
153 ICHECK(func) << "Input shoule be Function";
154 Expr new_body = Tuple(std::move(profile_data_));
155 Function ret_func = WithFields(GetRef<Function>(func), FreeVars(new_body), new_body);
156
157 // We are changing the function's ret_type to an empty type. Unfortunately, Optional<Type>() is
158 // indistinguishable from NullValue<Type>(), so we can't express "update to nullptr" in
159 // WithFields.
160 ret_func.CopyOnWrite()->ret_type = NullValue<Type>();
161 return std::move(ret_func);
162 }
163
164 private:
165 Array<Expr> profile_data_;
166 const Op& simulated_quantize_op_;
167
168 Expr VisitExpr_(const CallNode* call) {
169 Expr new_e = ExprMutator::VisitExpr_(call);
170 const CallNode* new_call = new_e.as<CallNode>();
171 ICHECK(new_call);
172 if (new_call->op == simulated_quantize_op_) {
173 auto attrs = new_call->attrs.as<SimulatedQuantizeAttrs>();
174 // rewrite the annotation
175 auto new_attrs = make_object<SimulatedQuantizeAttrs>();
176 const Expr& quantize_input = new_call->args[0]; // expression being quantized
177 auto placeholder = MakeConstantScalar(DataType::Float(32), 0.); // unused argument
178 Array<Expr> new_args{quantize_input, placeholder, placeholder, placeholder};
179 new_attrs->kind = QAnnotateKind::kQIdentity;
180 new_attrs->sign = attrs->sign;
181 new_attrs->rounding = attrs->rounding;
182 Expr identity_quantize = Call(new_call->op, new_args, Attrs{new_attrs}, {});
183
184 // add non-const expressions to profile data
185 if (attrs->kind != QAnnotateKind::kQWeight) {
186 ICHECK(!quantize_input.as<ConstantNode>());
187 profile_data_.push_back(identity_quantize);
188 }
189 return identity_quantize;
190 } else {
191 return new_e;
192 }
193 }
194};
195
196/*
197 * \brief Given an annotated graph, create a profile graph to collect profile data from the
198 * calibration dataset.
199 *
200 * This pass collects simulated_quantize op into a tuple. Simulated_quantize ops are rewritten to
201 * identity mode. The tuple is the output of the profile graph. Both input and output of this pass
202 * are relay::Function.
203 *
204 * \param expr The simulation graph after annotation.
205 * \return The profile graph.
206 */
207Expr CreateStatsCollector(const Expr& expr) { return StatsCollector().Collect(expr); }
208
209TVM_REGISTER_GLOBAL("relay._quantize.CreateStatsCollector").set_body_typed(CreateStatsCollector);
210
211TVM_REGISTER_GLOBAL("relay._quantize.FindScaleByKLMinimization")
212 .set_body([](TVMArgs args, TVMRetValue* ret) {
213 int* hist_ptr = static_cast<int*>(static_cast<void*>(args[0]));
214 float* hist_edges_ptr = static_cast<float*>(static_cast<void*>(args[1]));
215 int num_bins = args[2];
216 int num_quantized_bins = args[3];
217 std::vector<int> hist(hist_ptr, hist_ptr + num_bins);
218 std::vector<float> hist_edges(hist_edges_ptr, hist_edges_ptr + num_bins + 1);
219 ret[0] = MinimizeKL(hist, hist_edges, num_bins, num_quantized_bins);
220 });
221
222} // namespace quantize
223} // namespace relay
224} // namespace tvm
225