1 | #pragma once |
2 | |
3 | #include <c10/macros/Export.h> |
4 | #include <string> |
5 | |
6 | namespace torch { |
7 | namespace jit { |
8 | namespace mobile { |
9 | class Module; |
10 | namespace quantization { |
11 | /* |
12 | * Device side PTQ API. |
13 | * Once the model has been prepared for quantization on server side, such model |
14 | * is sent to device. On device side the model is further trained. At the end of |
15 | * the training, before the model is readied for inference, we need to quantize |
16 | * the model. |
17 | * Usage of this API is as follows. |
18 | * PTQQuanizationHelper ptq_helper; |
19 | * ptq_helper.quantize_dynamic(m, "forward"); |
20 | * Args: |
21 | * m: Captured by reference, an instance of mobile::Module. This module will be |
22 | * mutated in place to replace its <method_name> method with quantized |
23 | * equivalent. method:name: Name of the method to be quantized. AOT preparation |
24 | * for quantization must also have been done for this method. Returns: In place |
25 | * mutated `m` whose size should be smaller due to weight quantization and whose |
26 | * <method_name> method should use quantized ops |
27 | */ |
28 | class TORCH_API PTQQuanizationHelper { |
29 | public: |
30 | PTQQuanizationHelper() = default; |
31 | void quantize_dynamic( |
32 | torch::jit::mobile::Module& m, |
33 | const std::string& method_name); |
34 | }; |
35 | } // namespace quantization |
36 | } // namespace mobile |
37 | } // namespace jit |
38 | } // namespace torch |
39 | |