test_quantization.cpp source code [pytorch/test/cpp/tensorexpr/test_quantization.cpp]

1	#include <gtest/gtest.h>
2
3	#include <ATen/native/quantized/PackedParams.h>
4	#include <test/cpp/tensorexpr/test_base.h>
5	#include <torch/csrc/jit/ir/ir.h>
6	#include <torch/csrc/jit/ir/irparser.h>
7	#include <torch/csrc/jit/tensorexpr/kernel.h>
8	#include <torch/csrc/jit/tensorexpr/loopnest.h>
9	#include <torch/csrc/jit/tensorexpr/tensor.h>
10	#include <torch/csrc/jit/testing/file_check.h>
11	#include <torch/torch.h>
12	#include <cmath>
13	#include <sstream>
14	#include "torch/csrc/jit/tensorexpr/eval.h"
15	#include "torch/csrc/jit/tensorexpr/ir.h"
16
17	namespace torch {
18	namespace jit {
19
20	using namespace torch::jit::tensorexpr;
21	using SimpleIRExprEval = ExprEval<SimpleIREvaluator>;
22	using namespace torch::indexing;
23	using namespace torch::jit::tensorexpr;
24
25	class Quantization : public ::testing::Test {
26	public:
27	void SetUp() override {
28	getTEMustUseLLVMOnCPU() = false;
29	}
30	};
31
32	TEST_F(Quantization, QuantDequantInt8) {
33	const auto graph_string = R"IR(
34	graph(%x.1 : Float(2, 2, strides=[2, 1], device=cpu)):
35	%2 : int = prim::Constant[value=12]()
36	%3 : int = prim::Constant[value=13]()
37	%4 : float = prim::Constant[value=0.1]()
38	%q.1 : QInt8(2, 2) = aten::quantize_per_tensor(%x.1, %4, %3, %2)
39	%6 : Float(2, 2) = aten::dequantize(%q.1)
40	return (%6))IR";
41	auto graph = std::make_shared<Graph>();
42	parseIR(graph_string, &*graph);
43
44	auto x = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
45	auto q = at::quantize_per_tensor(x, `0.1f`, `13`, at::kQInt8);
46	auto y_expected = at::dequantize(q);
47	TensorExprKernel k(graph);
48	std::vector<at::Tensor> inputs = {x};
49	StmtPtr s = k.getCodeGenStmt();
50
51	std::vector<IValue> stack = fmap<IValue>(inputs);
52	k.run(stack);
53	auto y = stack [`0`].toTensor();
54	bool check = at::allclose(y_expected, y);
55	if (!check) {
56	std::cout << "y_expected:\n" << y_expected << std::endl;
57	std::cout << "y:\n" << y << std::endl;
58	}
59	TORCH_CHECK_EQ(check, `1`);
60	}
61
62	TEST_F(Quantization, QuantDequantUInt8) {
63	const auto graph_string = R"IR(
64	graph(%x.1 : Float(2, 2, strides=[2, 1], device=cpu)):
65	%2 : int = prim::Constant[value=13]()
66	%3 : int = prim::Constant[value=122]()
67	%4 : float = prim::Constant[value=0.1]()
68	%q.1 : QUInt8(2, 2) = aten::quantize_per_tensor(%x.1, %4, %3, %2)
69	%6 : Float(2, 2) = aten::dequantize(%q.1)
70	return (%6))IR";
71	auto graph = std::make_shared<Graph>();
72	parseIR(graph_string, &*graph);
73
74	auto x = `2` * at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
75	auto q = at::quantize_per_tensor(x, `0.1f`, `122`, at::kQUInt8);
76	auto y_expected = at::dequantize(q);
77	TensorExprKernel k(graph);
78	std::vector<at::Tensor> inputs = {x};
79	StmtPtr s = k.getCodeGenStmt();
80
81	std::vector<IValue> stack = fmap<IValue>(inputs);
82	k.run(stack);
83	auto y = stack [`0`].toTensor();
84	bool check = at::allclose(y_expected, y);
85	if (!check) {
86	std::cout << "y_expected:\n" << y_expected << std::endl;
87	std::cout << "y:\n" << y << std::endl;
88	}
89	TORCH_CHECK_EQ(check, `1`);
90	}
91
92	TEST_F(Quantization, QuantDequantUInt8_NLC) {
93	const auto graph_string = R"IR(
94	graph(%x.1 : Float(1, 2, 2, strides=[4, 1, 2], device=cpu)):
95	%2 : int = prim::Constant[value=13]()
96	%3 : int = prim::Constant[value=122]()
97	%4 : float = prim::Constant[value=0.1]()
98	%q.1 : QUInt8(1, 2, 2) = aten::quantize_per_tensor(%x.1, %4, %3, %2)
99	%6 : Float(1, 2, 2) = aten::dequantize(%q.1)
100	return (%6))IR";
101	auto graph = std::make_shared<Graph>();
102	parseIR(graph_string, &*graph);
103
104	auto x = `2` * at::rand({`1`, `2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
105	x.unsafeGetTensorImpl()->set_sizes_and_strides(
106	std::initializer_list<int64_t>{`1`, `2`, `2`}, {`4`, `1`, `2`});
107	auto q = at::quantize_per_tensor(x, `0.1f`, `122`, at::kQUInt8);
108	auto y_expected = at::dequantize(q);
109	TensorExprKernel k(graph);
110	std::vector<at::Tensor> inputs = {x};
111	StmtPtr s = k.getCodeGenStmt();
112
113	std::vector<IValue> stack = fmap<IValue>(inputs);
114	k.run(stack);
115	auto y = stack [`0`].toTensor();
116	bool check = at::allclose(y_expected, y);
117	if (!check) {
118	std::cout << "x:\n" << x << std::endl;
119	std::cout << "y_expected:\n" << y_expected << std::endl;
120	std::cout << "y:\n" << y << std::endl;
121	}
122	TORCH_CHECK_EQ(check, `1`);
123	}
124
125	at::Tensor quantized_add(
126	at::Tensor x1,
127	at::Tensor x2,
128	double scale,
129	int64_t zero) {
130	const auto qadd_op =
131	c10::Dispatcher::singleton()
132	.findSchemaOrThrow("quantized::add", "")
133	.typed<at::Tensor(at::Tensor, at::Tensor, double, int64_t)>();
134	return qadd_op.call(x1, x2, scale, zero);
135	}
136
137	TEST_F(Quantization, QuantAddDequantInt8) {
138	const auto graph_string = R"IR(
139	graph(%x1 : Float(2, 2, strides=[2, 1], device=cpu), %x2 : Float(2, 2, strides=[2, 1], device=cpu)):
140	%2 : int = prim::Constant[value=12]()
141	%qz1 : int = prim::Constant[value=13]()
142	%qs1 : float = prim::Constant[value=0.1]()
143	%qz2 : int = prim::Constant[value=13]()
144	%qs2 : float = prim::Constant[value=0.1]()
145	%qza : int = prim::Constant[value=13]()
146	%qsa : float = prim::Constant[value=0.1]()
147	%q1 : QInt8(2, 2) = aten::quantize_per_tensor(%x1, %qs1, %qz1, %2)
148	%q2 : QInt8(2, 2) = aten::quantize_per_tensor(%x2, %qs2, %qz2, %2)
149	%qa : QInt8(2, 2) = quantized::add(%q1, %q2, %qsa, %qza)
150	%6 : Float(2, 2) = aten::dequantize(%qa)
151	return (%6))IR";
152	auto graph = std::make_shared<Graph>();
153	parseIR(graph_string, &*graph);
154
155	auto x1 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
156	auto x2 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
157	auto q1 = at::quantize_per_tensor(x1, `0.1f`, `13`, at::kQInt8);
158	auto q2 = at::quantize_per_tensor(x2, `0.1f`, `13`, at::kQInt8);
159	auto qa = quantized_add(q1, q2, `0.1f`, `13`);
160	auto y_expected = at::dequantize(qa);
161	TensorExprKernel k(graph);
162	std::vector<at::Tensor> inputs = {x1, x2};
163	StmtPtr s = k.getCodeGenStmt();
164
165	std::vector<IValue> stack = fmap<IValue>(inputs);
166	k.run(stack);
167	auto y = stack [`0`].toTensor();
168	bool check = at::allclose(y_expected, y);
169	if (!check) {
170	std::cout << "x1:\n" << x1 << std::endl;
171	std::cout << "q1:\n" << q1 << std::endl;
172	std::cout << "x2:\n" << x2 << std::endl;
173	std::cout << "q2:\n" << q2 << std::endl;
174	std::cout << "y_expected:\n" << y_expected << std::endl;
175	std::cout << "y:\n" << y << std::endl;
176	}
177	TORCH_CHECK_EQ(check, `1`);
178	}
179
180	TEST_F(Quantization, QuantAddDequantUInt8) {
181	const auto graph_string = R"IR(
182	graph(%x1 : Float(2, 2, strides=[2, 1], device=cpu), %x2 : Float(2, 2, strides=[2, 1], device=cpu)):
183	%2 : int = prim::Constant[value=13]()
184	%qz1 : int = prim::Constant[value=13]()
185	%qs1 : float = prim::Constant[value=0.1]()
186	%qz2 : int = prim::Constant[value=13]()
187	%qs2 : float = prim::Constant[value=0.1]()
188	%qza : int = prim::Constant[value=13]()
189	%qsa : float = prim::Constant[value=0.1]()
190	%q1 : QUInt8(2, 2) = aten::quantize_per_tensor(%x1, %qs1, %qz1, %2)
191	%q2 : QUInt8(2, 2) = aten::quantize_per_tensor(%x2, %qs2, %qz2, %2)
192	%qa : QUInt8(2, 2) = quantized::add(%q1, %q2, %qsa, %qza)
193	%6 : Float(2, 2) = aten::dequantize(%qa)
194	return (%6))IR";
195	auto graph = std::make_shared<Graph>();
196	parseIR(graph_string, &*graph);
197
198	auto x1 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
199	auto x2 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
200	auto q1 = at::quantize_per_tensor(x1, `0.1f`, `13`, at::kQUInt8);
201	auto q2 = at::quantize_per_tensor(x2, `0.1f`, `13`, at::kQUInt8);
202	auto qa = quantized_add(q1, q2, `0.1f`, `13`);
203	auto y_expected = at::dequantize(qa);
204
205	TensorExprKernel k(graph);
206	std::vector<at::Tensor> inputs = {x1, x2};
207	StmtPtr s = k.getCodeGenStmt();
208
209	std::vector<IValue> stack = fmap<IValue>(inputs);
210	k.run(stack);
211	auto y = stack [`0`].toTensor();
212	bool check = at::allclose(y_expected, y);
213	if (!check) {
214	std::cout << "x1:\n" << x1 << std::endl;
215	std::cout << "q1:\n" << q1 << std::endl;
216	std::cout << "x2:\n" << x2 << std::endl;
217	std::cout << "q2:\n" << q2 << std::endl;
218	std::cout << "y_expected:\n" << y_expected << std::endl;
219	std::cout << "y:\n" << y << std::endl;
220	}
221	TORCH_CHECK_EQ(check, `1`);
222	}
223
224	TEST_F(Quantization, QuantSigmoidDequantUInt8) {
225	const auto graph_string = R"IR(
226	graph(%x1 : Float(2, 2, strides=[2, 1], device=cpu)):
227	%2 : int = prim::Constant[value=13]()
228	%qz1 : int = prim::Constant[value=13]()
229	%qs1 : float = prim::Constant[value=0.1]()
230	%q1 : QUInt8(2, 2) = aten::quantize_per_tensor(%x1, %qs1, %qz1, %2)
231	%qa : QUInt8(2, 2) = aten::sigmoid(%q1)
232	%6 : Float(2, 2) = aten::dequantize(%qa)
233	return (%6))IR";
234	auto graph = std::make_shared<Graph>();
235	parseIR(graph_string, &*graph);
236
237	auto x1 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
238	auto q1 = at::quantize_per_tensor(x1, `0.1f`, `13`, at::kQUInt8);
239	auto qs = at::sigmoid(q1);
240	auto y_expected = at::dequantize(qs);
241
242	TensorExprKernel k(graph);
243	std::vector<at::Tensor> inputs = {x1};
244	StmtPtr s = k.getCodeGenStmt();
245
246	std::vector<IValue> stack = fmap<IValue>(inputs);
247	k.run(stack);
248	auto y = stack [`0`].toTensor();
249	bool check = at::allclose(y_expected, y);
250	if (!check) {
251	std::cout << "x1:\n" << x1 << std::endl;
252	std::cout << "q1:\n" << q1 << std::endl;
253	std::cout << "qs:\n" << qs << std::endl;
254	std::cout << "y_expected:\n" << y_expected << std::endl;
255	std::cout << "y:\n" << y << std::endl;
256	}
257	TORCH_CHECK_EQ(check, `1`);
258	}
259
260	at::Tensor quantized_mul(
261	at::Tensor x1,
262	at::Tensor x2,
263	double scale,
264	int64_t zero) {
265	const auto op =
266	c10::Dispatcher::singleton()
267	.findSchemaOrThrow("quantized::mul", "")
268	.typed<at::Tensor(at::Tensor, at::Tensor, double, int64_t)>();
269	return op.call(x1, x2, scale, zero);
270	}
271
272	TEST_F(Quantization, QuantMulDequantUInt8) {
273	const auto graph_string = R"IR(
274	graph(%x1 : Float(2, 2, strides=[2, 1], device=cpu), %x2 : Float(2, 2, strides=[2, 1], device=cpu)):
275	%2 : int = prim::Constant[value=13]()
276	%qz1 : int = prim::Constant[value=13]()
277	%qs1 : float = prim::Constant[value=0.1]()
278	%qz2 : int = prim::Constant[value=13]()
279	%qs2 : float = prim::Constant[value=0.1]()
280	%qza : int = prim::Constant[value=13]()
281	%qsa : float = prim::Constant[value=0.1]()
282	%q1 : QUInt8(2, 2) = aten::quantize_per_tensor(%x1, %qs1, %qz1, %2)
283	%q2 : QUInt8(2, 2) = aten::quantize_per_tensor(%x2, %qs2, %qz2, %2)
284	%qa : QUInt8(2, 2) = quantized::mul(%q1, %q2, %qsa, %qza)
285	%6 : Float(2, 2) = aten::dequantize(%qa)
286	return (%6))IR";
287	auto graph = std::make_shared<Graph>();
288	parseIR(graph_string, &*graph);
289
290	auto x1 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
291	auto x2 = at::rand({`2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
292	auto q1 = at::quantize_per_tensor(x1, `0.1f`, `13`, at::kQUInt8);
293	auto q2 = at::quantize_per_tensor(x2, `0.1f`, `13`, at::kQUInt8);
294	auto qa = quantized_mul(q1, q2, `0.1f`, `13`);
295	auto y_expected = at::dequantize(qa);
296
297	TensorExprKernel k(graph);
298	std::vector<at::Tensor> inputs = {x1, x2};
299	StmtPtr s = k.getCodeGenStmt();
300
301	std::vector<IValue> stack = fmap<IValue>(inputs);
302	k.run(stack);
303	auto y = stack [`0`].toTensor();
304	bool check = at::allclose(y_expected, y);
305	if (!check) {
306	std::cout << "x1:\n" << x1 << std::endl;
307	std::cout << "q1:\n" << q1 << std::endl;
308	std::cout << "x2:\n" << x2 << std::endl;
309	std::cout << "q2:\n" << q2 << std::endl;
310	std::cout << "y_expected:\n" << y_expected << std::endl;
311	std::cout << "y:\n" << y << std::endl;
312	}
313	TORCH_CHECK_EQ(check, `1`);
314	}
315
316	TEST_F(Quantization, QuantUpsampleNearst2dDequantUInt8) {
317	const auto graph_string = R"IR(
318	graph(%x : Float(1, 1, 4, 4, strides=[16, 16, 4, 1], device=cpu)):
319	%2 : int = prim::Constant[value=13]()
320	%4 : NoneType = prim::Constant()
321	%3 : int[] = prim::Constant[value=[6, 6]]()
322	%qz : int = prim::Constant[value=13]()
323	%qs : float = prim::Constant[value=0.1]()
324	%q : QUInt8(1, 1, 4, 4) = aten::quantize_per_tensor(%x, %qs, %qz, %2)
325	%qu : QUInt8(1, 1, 6, 6) = aten::upsample_nearest2d(%q, %3, %4)
326	%6 : Float(1, 1, 6, 6) = aten::dequantize(%qu)
327	return (%6))IR";
328	auto graph = std::make_shared<Graph>();
329	parseIR(graph_string, &*graph);
330
331	auto x = at::rand({`1`, `1`, `4`, `4`}, TensorOptions(kCPU).dtype(at::kFloat));
332	auto q = at::quantize_per_tensor(x, `0.1f`, `13`, at::kQUInt8);
333	auto qu = at::upsample_nearest2d(q, {`6`, `6`});
334	auto y_expected = at::dequantize(qu);
335
336	TensorExprKernel k(graph);
337	std::vector<at::Tensor> inputs = {x};
338	StmtPtr s = k.getCodeGenStmt();
339
340	std::vector<IValue> stack = fmap<IValue>(inputs);
341	k.run(stack);
342	auto y = stack [`0`].toTensor();
343	bool check = at::allclose(y_expected, y);
344	if (!check) {
345	std::cout << "x:\n" << x << std::endl;
346	std::cout << "q:\n" << q << std::endl;
347	std::cout << "qu:\n" << qu << std::endl;
348	std::cout << "y_expected:\n" << y_expected << std::endl;
349	std::cout << "y:\n" << y << std::endl;
350	}
351	TORCH_CHECK_EQ(check, `1`);
352	}
353
354	TEST_F(Quantization, UpsampleNearst2d) {
355	const auto graph_string = R"IR(
356	graph(%x : Float(1, 1, 2, 2, strides=[2, 2, 2, 1], device=cpu)):
357	%4 : NoneType = prim::Constant()
358	%3 : int[] = prim::Constant[value=[4, 4]]()
359	%u : Float(1, 1, 4, 4) = aten::upsample_nearest2d(%x, %3, %4)
360	return (%u))IR";
361	auto graph = std::make_shared<Graph>();
362	parseIR(graph_string, &*graph);
363
364	auto x = at::rand({`1`, `1`, `2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
365	auto y_expected = at::upsample_nearest2d(x, {`4`, `4`});
366
367	TensorExprKernel k(graph);
368	std::vector<at::Tensor> inputs = {x};
369	StmtPtr s = k.getCodeGenStmt();
370
371	std::vector<IValue> stack = fmap<IValue>(inputs);
372	k.run(stack);
373	auto y = stack [`0`].toTensor();
374	bool check = at::allclose(y_expected, y);
375	if (!check) {
376	std::cout << "x:\n" << x << std::endl;
377	std::cout << "y_expected:\n" << y_expected << std::endl;
378	std::cout << "y:\n" << y << std::endl;
379	}
380	TORCH_CHECK_EQ(check, `1`);
381	}
382
383	at::Tensor quantized_cat(
384	c10::List<at::Tensor> const& xs,
385	int64_t dim,
386	double scale,
387	int64_t zero) {
388	const auto op = c10::Dispatcher::singleton()
389	.findSchemaOrThrow("quantized::cat", "")
390	.typed<at::Tensor(
391	c10::List<at::Tensor> const&,
392	int64_t,
393	c10::optional<double>,
394	c10::optional<int64_t>)>();
395	return op.redispatch(
396	DispatchKeySet ({DispatchKey::QuantizedCPU}), xs, dim, scale, zero);
397	}
398
399	TEST_F(Quantization, QuantCatDequantUInt8) {
400	const auto graph_string = R"IR(
401	graph(%x : Float(1, 1, 2, 2, strides=[2, 2, 2, 1], device=cpu), %y : Float(1, 1, 2, 2, strides=[2, 2, 2, 1], device=cpu), %z : Float(1, 1, 2, 2, strides=[2, 2, 2, 1], device=cpu)):
402	%qdt : int = prim::Constant[value=13]()
403	%qxz : int = prim::Constant[value=13]()
404	%qxs : float = prim::Constant[value=0.1]()
405	%qyz : int = prim::Constant[value=16]()
406	%qys : float = prim::Constant[value=0.15]()
407	%qzz : int = prim::Constant[value=19]()
408	%qzs : float = prim::Constant[value=0.2]()
409	%qx : QUInt8(1, 1, 2, 2) = aten::quantize_per_tensor(%x, %qxs, %qxz, %qdt)
410	%qy : QUInt8(1, 1, 2, 2) = aten::quantize_per_tensor(%y, %qys, %qyz, %qdt)
411	%qz : QUInt8(1, 1, 2, 2) = aten::quantize_per_tensor(%z, %qzs, %qzz, %qdt)
412	%catx : Tensor[] = prim::ListConstruct(%qx, %qy, %qz)
413	%catd : int = prim::Constant[value=0]()
414	%qcat : QUInt8(3, 1, 2, 2) = quantized::cat(%catx, %catd, %qxs, %qxz)
415	%cat : Float(3, 1, 2, 2) = aten::dequantize(%qcat)
416	return (%cat))IR";
417	auto graph = std::make_shared<Graph>();
418	parseIR(graph_string, &*graph);
419
420	auto x = at::rand({`1`, `1`, `2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
421	auto y = at::rand({`1`, `1`, `2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
422	auto z = at::rand({`1`, `1`, `2`, `2`}, TensorOptions(kCPU).dtype(at::kFloat));
423	auto qx = at::quantize_per_tensor(x, `0.1f`, `13`, at::kQUInt8);
424	auto qy = at::quantize_per_tensor(y, `0.15f`, `16`, at::kQUInt8);
425	auto qz = at::quantize_per_tensor(z, `0.2f`, `19`, at::kQUInt8);
426	auto qcat = quantized_cat({qx, qy, qz}, `0`, `0.1f`, `13`);
427	auto expected = at::dequantize(qcat);
428
429	TensorExprKernel k(graph);
430	std::vector<at::Tensor> inputs = {x, y, z};
431	StmtPtr s = k.getCodeGenStmt();
432
433	std::vector<IValue> stack = fmap<IValue>(inputs);
434	k.run(stack);
435	auto result = stack [`0`].toTensor();
436	bool check = at::allclose(expected, result);
437	if (!check) {
438	std::cout << "x:\n" << x << std::endl;
439	std::cout << "y:\n" << y << std::endl;
440	std::cout << "z:\n" << z << std::endl;
441	std::cout << "qx:\n" << qx << std::endl;
442	std::cout << "qy:\n" << qy << std::endl;
443	std::cout << "qz:\n" << qz << std::endl;
444	std::cout << "qcat:\n" << qcat << std::endl;
445	std::cout << "expected:\n" << expected << std::endl;
446	std::cout << "result:\n" << result << std::endl;
447	}
448	TORCH_CHECK_EQ(check, `1`);
449	}
450
451	} // namespace jit
452	} // namespace torch
453

Browse the source code of pytorch/test/cpp/tensorexpr/test_quantization.cpp