1 | #pragma once |
2 | |
3 | #include <c10/core/ScalarType.h> |
4 | #include <c10/core/QScheme.h> |
5 | #include <c10/util/intrusive_ptr.h> |
6 | |
7 | namespace at { |
8 | |
9 | class Tensor; |
10 | struct QTensorImpl; |
11 | struct Quantizer; |
12 | using ConstQuantizerPtr = const c10::intrusive_ptr<Quantizer>&; |
13 | using QuantizerPtr = c10::intrusive_ptr<Quantizer>; |
14 | |
15 | /** |
16 | * Quantizer is the class for storing all the information |
17 | * that's necessary to perform quantize and dequantize |
18 | * operation. |
19 | * |
20 | * We might have different types of quantization schemes and this is |
21 | * the base class for all quantizers. |
22 | * |
23 | * QTensorImpl will hold a pointer to Quantizer so that we can support |
24 | * different quantization schemes on Tensor. |
25 | * |
26 | * For example, the most common quantization scheme, Affine Quantization, |
27 | * requires scale and zero_point as parameters, we'll store scale and zero_point |
28 | * inside the instance and we can use it to quantize a float Tensor or |
29 | * dequantize a quantized Tensor. |
30 | * |
31 | * When you add new types of leaf Quantizer class, please also |
32 | * make sure to add a corresponding QScheme enum since |
33 | * they should have one to one mapping. |
34 | * |
35 | * Note about intrusive_ptr: |
36 | * Quantized Tensor holds an intrusive_ptr to Quantizer, and multiple Tensor can |
37 | * share the same Quantizer. Quantizer should be immutable. |
38 | */ |
39 | struct TORCH_API Quantizer : public c10::intrusive_ptr_target { |
40 | const ScalarType scalar_type_; |
41 | explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {} |
42 | ~Quantizer() override; |
43 | |
44 | // Copied from torch/csrc/jit/ir/scope.h |
45 | QuantizerPtr intrusive_from_this() { |
46 | c10::raw::intrusive_ptr::incref(this); // we are creating a new pointer |
47 | // from a raw `this` pointer |
48 | // so we need to bump the refcount |
49 | // to account for this ownership |
50 | return c10::intrusive_ptr<Quantizer>::reclaim(this); |
51 | } |
52 | |
53 | /** |
54 | * Each concrete Quantizer type should have a unique QScheme type. |
55 | */ |
56 | virtual QScheme qscheme() const = 0; |
57 | |
58 | ScalarType scalar_type() const { |
59 | return scalar_type_; |
60 | } |
61 | |
62 | /** |
63 | * quantize a float Tensor into a quantized Tensor. |
64 | */ |
65 | virtual Tensor quantize(const Tensor& t) = 0; |
66 | |
67 | /** |
68 | * dequantize a quantized Tensor into a float Tensor. |
69 | */ |
70 | virtual Tensor dequantize(const Tensor& t) = 0; |
71 | |
72 | /** |
73 | * dequantize a quantized Tensor into a float Tensor, out= variant |
74 | */ |
75 | virtual Tensor& dequantize_out(Tensor& out, const Tensor& t) = 0; |
76 | |
77 | /** |
78 | * Compare against `other` for equality. |
79 | */ |
80 | virtual bool equalTo(QuantizerPtr other) const = 0; |
81 | }; |
82 | |
83 | } // namespace at |
84 | |