1#pragma once
2
3#include <c10/macros/Export.h>
4#include <string>
5
6namespace torch {
7namespace jit {
8namespace mobile {
9class Module;
10namespace quantization {
11/*
12 * Device side PTQ API.
13 * Once the model has been prepared for quantization on server side, such model
14 * is sent to device. On device side the model is further trained. At the end of
15 * the training, before the model is readied for inference, we need to quantize
16 * the model.
17 * Usage of this API is as follows.
18 * PTQQuanizationHelper ptq_helper;
19 * ptq_helper.quantize_dynamic(m, "forward");
20 * Args:
21 * m: Captured by reference, an instance of mobile::Module. This module will be
22 * mutated in place to replace its <method_name> method with quantized
23 * equivalent. method:name: Name of the method to be quantized. AOT preparation
24 * for quantization must also have been done for this method. Returns: In place
25 * mutated `m` whose size should be smaller due to weight quantization and whose
26 * <method_name> method should use quantized ops
27 */
28class TORCH_API PTQQuanizationHelper {
29 public:
30 PTQQuanizationHelper() = default;
31 void quantize_dynamic(
32 torch::jit::mobile::Module& m,
33 const std::string& method_name);
34};
35} // namespace quantization
36} // namespace mobile
37} // namespace jit
38} // namespace torch
39