1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * This source code is licensed under the BSD-style license found in the |
5 | * LICENSE file in the root directory of this source tree. |
6 | */ |
7 | #define FBGEMM_EXPORTS |
8 | #include "fbgemm/FbgemmI8DepthwiseAvx2.h" |
9 | |
10 | #include <stdexcept> // for logic_error |
11 | #include <string> |
12 | |
13 | #include "./FbgemmI8Depthwise2DAvx2-inl.h" |
14 | |
15 | namespace fbgemm { |
16 | |
17 | // Old interface |
18 | template <typename BIAS_TYPE /*=std::int32_t*/> |
19 | void depthwise_2d_per_channel_quantization_same_pad( |
20 | int N, |
21 | int H, |
22 | int W, |
23 | int IC_OC, |
24 | int stride_h, |
25 | int stride_w, |
26 | int32_t A_zero_point, |
27 | const uint8_t* A, |
28 | const int32_t* B_zero_point, |
29 | const PackedDepthWiseConvMatrix& Bp, |
30 | const float* C_multiplier, |
31 | int32_t C_zero_point, |
32 | uint8_t* C, |
33 | const int32_t* col_offsets, |
34 | const BIAS_TYPE* bias, |
35 | bool fuse_relu, |
36 | const float* act_times_w_scale, |
37 | int thread_id, |
38 | int num_threads) { |
39 | depthwise_2d_same_pad<QuantizationGranularity::OUT_CHANNEL>( |
40 | N, |
41 | H, |
42 | W, |
43 | IC_OC, |
44 | IC_OC, |
45 | stride_h, |
46 | stride_w, |
47 | A_zero_point, |
48 | A, |
49 | B_zero_point, |
50 | Bp, |
51 | C_multiplier, |
52 | C_zero_point, |
53 | C, |
54 | col_offsets, |
55 | bias, |
56 | fuse_relu, |
57 | act_times_w_scale, |
58 | thread_id, |
59 | num_threads); |
60 | } |
61 | |
62 | template FBGEMM_API void |
63 | depthwise_2d_per_channel_quantization_same_pad<int32_t>( |
64 | int N, |
65 | int H, |
66 | int W, |
67 | int IC_OC, |
68 | int stride_h, |
69 | int stride_w, |
70 | int32_t A_zero_point, |
71 | const uint8_t* A, |
72 | const int32_t* B_zero_point, |
73 | const PackedDepthWiseConvMatrix& Bp, |
74 | const float* C_multiplier, |
75 | int32_t C_zero_point, |
76 | uint8_t* C, |
77 | const int32_t* col_offsets, |
78 | const int32_t* bias, |
79 | bool fuse_relu, |
80 | const float* act_times_w_scale, |
81 | int thread_id, |
82 | int num_threads); |
83 | |
84 | template FBGEMM_API void depthwise_2d_per_channel_quantization_same_pad<float>( |
85 | int N, |
86 | int H, |
87 | int W, |
88 | int IC_OC, |
89 | int stride_h, |
90 | int stride_w, |
91 | int32_t A_zero_point, |
92 | const uint8_t* A, |
93 | const int32_t* B_zero_point, |
94 | const PackedDepthWiseConvMatrix& Bp, |
95 | const float* C_multiplier, |
96 | int32_t C_zero_point, |
97 | uint8_t* C, |
98 | const int32_t* col_offsets, |
99 | const float* bias, |
100 | bool fuse_relu, |
101 | const float* act_times_w_scale, |
102 | int thread_id, |
103 | int num_threads); |
104 | |
105 | } // namespace fbgemm |
106 | |