Quantizer.h source code [pytorch/aten/src/ATen/quantized/Quantizer.h]

1	#pragma once
2
3	#include <c10/core/QScheme.h>
4	#include <c10/core/MemoryFormat.h>
5	#include <c10/macros/Macros.h>
6	#include <c10/util/Exception.h>
7	#include <c10/util/intrusive_ptr.h>
8	#include <c10/core/ScalarType.h>
9	#include <c10/core/TensorOptions.h>
10
11	#include <ATen/Tensor.h>
12	#include <ATen/TensorUtils.h>
13
14	#include <ATen/core/QuantizerBase.h>
15
16	#include <cmath>
17	#include <memory>
18	#include <utility>
19
20	namespace at {
21
22	/**
23	* UnknownQuantizer is a placeholder quantizer for functions that implement
24	* quantization in a two step process. First a tensor is allocated but with
25	* unknown quantizer, and then the quantization kernel decides what the final
26	* quantizer will be.
27	*/
28	struct TORCH_API UnknownQuantizer : public Quantizer {
29	explicit UnknownQuantizer(ScalarType scalar_type)
30	: Quantizer (scalar_type) {}
31
32	Tensor quantize(const Tensor& tensor) override;
33	Tensor dequantize(const Tensor& qtensor) override;
34	Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
35	QScheme qscheme() const override;
36	bool equalTo(QuantizerPtr other) const override;
37	};
38
39	/**
40	* UniformQuantizer is the parent class for all uniform quantizers.
41	* These quantization scheme will map float value uniformly to
42	* the quantized value. For example, affine quantizer is
43	* the most commonly used scheme in this category.
44	*/
45	struct TORCH_API UniformQuantizer : public Quantizer {
46	explicit UniformQuantizer(ScalarType scalar_type) : Quantizer (scalar_type) {}
47	};
48
49	/**
50	* NonUniformQuantizer is the parent class for all non-uniform quantizers.
51	* These quantization scheme may map float value non-uniformly to the quantized
52	* value. K-means quantization is a representative example in this category.
53	*/
54	struct TORCH_API NonUniformQuantizer : public Quantizer {
55	explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer (scalar_type) {}
56	};
57
58	// There is also StochasticQuantizer which is uniform but not affine
59
60	/**
61	* AffineQuantizer uses affine transformation to do quantization.
62	*
63	* For quantize:
64	* Y = clamp(round(X / scale + zero_point), min, max)
65	* For dequantize:
66	* X = (Y - zero_point) * scale
67	*/
68	struct TORCH_API AffineQuantizer : public UniformQuantizer {
69	explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer (scalar_type) {}
70	};
71
72	// Note that we will not have Symmetric Quantizer in backend to reduce
73	// complications in quantized kernel implementation.
74
75	/**
76	* PerTensorAffineQuantizer stores a scale and a zero_point, which is used for
77	* all the values in the Tensor.
78	*/
79	struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer {
80	explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point)
81	: AffineQuantizer (scalar_type),
82	scale_(scale),
83	zero_point_(zero_point) {}
84
85	Tensor quantize(const Tensor& tensor) override;
86	Tensor dequantize(const Tensor& qtensor) override;
87	Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
88
89	QScheme qscheme() const override {
90	return kPerTensorAffine;
91	}
92
93	double scale() const {
94	return scale_;
95	}
96
97	int64_t zero_point() const {
98	return zero_point_;
99	}
100
101	bool equalTo(QuantizerPtr other) const override {
102	if (!other.get() \|\| other ->qscheme() != kPerTensorAffine) {
103	return false;
104	}
105	auto* other_per_tensor_affine =
106	static_cast<PerTensorAffineQuantizer*>(other.get());
107	return scalar_type() == other_per_tensor_affine->scalar_type() &&
108	scale() == other_per_tensor_affine->scale() &&
109	zero_point() == other_per_tensor_affine->zero_point();
110	}
111
112	private:
113	const double scale_;
114	// We use int64_t for consistency with Python
115	const int64_t zero_point_;
116	};
117
118	/**
119	* PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer
120	* except that we have an independent scale and zero_point parameter
121	* for each channel.
122	*
123	* Also note that per channel quantization is mostly applied to output channels
124	* of weights since per-input channel of weight quantization or per-channel
125	* quantization for activations can't be efficiently supported in most of
126	* processors since it requires each multiplication result within a single
127	* dot-product to have a different scale.
128	*/
129	struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer {
130	explicit PerChannelAffineQuantizer(
131	ScalarType scalar_type,
132	Tensor scales,
133	Tensor zero_points,
134	int64_t axis)
135	: AffineQuantizer (scalar_type),
136	scales_(std::move(scales)),
137	zero_points_(std::move(zero_points)),
138	axis_(axis) {}
139
140	QScheme qscheme() const override {
141	return kPerChannelAffine;
142	}
143
144	Tensor scales() const {
145	return scales_;
146	}
147
148	Tensor zero_points() const {
149	return zero_points_;
150	}
151
152	int64_t axis() const {
153	return axis_;
154	}
155
156	Tensor quantize(const Tensor& tensor) override;
157	Tensor dequantize(const Tensor& qtensor) override;
158	Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
159
160	bool equalTo(QuantizerPtr other) const override {
161	if (!other.get() \|\| other ->qscheme() != kPerChannelAffine) {
162	return false;
163	}
164	auto* other_per_channel_affine =
165	static_cast<PerChannelAffineQuantizer*>(other.get());
166	return scalar_type() == other_per_channel_affine->scalar_type() &&
167	scales().equal(other_per_channel_affine->scales()) &&
168	zero_points().equal(other_per_channel_affine->zero_points()) &&
169	axis() == other_per_channel_affine->axis();
170	}
171
172	protected:
173	Tensor scales_;
174	Tensor zero_points_;
175	const int64_t axis_;
176	};
177
178	/**
179	* PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer
180	* except that it expects both scale and zero point to be floating point values.
181	*
182	* This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of
183	* kPerChannelAffine.
184	*
185	* The quantize equation in this case looks like -
186	* Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale
187	*
188	* Note: Usage of floating point zero point is useful in cases where 0 doesn't need to
189	* be exactly represented in the quantized space. We can get additional precision by
190	* using floating point values for zero point.
191	*/
192	struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer {
193	explicit PerChannelAffineFloatQParamsQuantizer(
194	ScalarType scalar_type,
195	Tensor scales,
196	Tensor zero_points,
197	int64_t axis)
198	: PerChannelAffineQuantizer (scalar_type,
199	scales,
200	zero_points,
201	axis) {}
202
203	QScheme qscheme() const override {
204	return kPerChannelAffineFloatQParams;
205	}
206
207	Tensor quantize(const Tensor& tensor) override;
208	Tensor dequantize(const Tensor& qtensor) override;
209	Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
210
211	bool equalTo(QuantizerPtr other) const override {
212	if (!other.get() \|\| other ->qscheme() != kPerChannelAffineFloatQParams) {
213	return false;
214	}
215	auto* other_per_channel_float_qparams =
216	static_cast<PerChannelAffineFloatQParamsQuantizer*>(other.get());
217	return scalar_type() == other_per_channel_float_qparams->scalar_type() &&
218	scales().equal(other_per_channel_float_qparams->scales()) &&
219	zero_points().equal(other_per_channel_float_qparams->zero_points()) &&
220	axis() == other_per_channel_float_qparams->axis();
221	}
222	};
223
224	// This is an internal utility function for getting at the QTensorImpl,
225	// You should only use this for writing low level
226	// setters/getters for QTensorImpl fields; otherwise, you should use
227	// the low level setters/getters that were implemented using this.
228	// This may be called repeatedly, so make sure it's pretty cheap.
229	TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self);
230
231	// double and int64_t are because of the native function API, we only have these
232	// argument types right now in native functions
233	TORCH_API QuantizerPtr
234	make_per_tensor_affine_quantizer(
235	double scale, int64_t zero_point, ScalarType scalar_type);
236
237	TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
238	const Tensor& scales,
239	const Tensor& zero_points,
240	int64_t axis,
241	ScalarType scalar_type);
242
243	TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
244
245	// Create a Quantized Tensor given arguments for normal Tensor and a quantizer
246	TORCH_API Tensor new_qtensor(
247	IntArrayRef sizes,
248	const TensorOptions& options,
249	QuantizerPtr quantizer);
250
251	TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);
252
253	TORCH_API Tensor from_blob_quantized_per_tensor_affine(
254	void* data,
255	IntArrayRef sizes,
256	IntArrayRef strides,
257	std::function<void(void*)> deleter,
258	const float scale,
259	const int64_t zeroPoint,
260	const TensorOptions& options);
261
262	TORCH_API Tensor from_blob_quantized_per_tensor_affine(
263	void* data,
264	IntArrayRef sizes,
265	std::function<void(void*)> deleter,
266	const float scale,
267	const int64_t zeroPoint,
268	const TensorOptions& options);
269
270	TORCH_API Tensor from_blob_quantized_per_channel_affine(
271	void* data,
272	IntArrayRef sizes,
273	std::function<void(void*)> deleter,
274	const Tensor& scales,
275	const Tensor& zero_points,
276	const int64_t axis,
277	const TensorOptions& options);
278
279	} // namespace at
280

Browse the source code of pytorch/aten/src/ATen/quantized/Quantizer.h