FbgemmI8DepthwisePerChannelQuantAvx2.cc source code [pytorch/third_party/fbgemm/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc]

1	/*
2	* Copyright (c) Meta Platforms, Inc. and affiliates.
3	* All rights reserved.
4	* This source code is licensed under the BSD-style license found in the
5	* LICENSE file in the root directory of this source tree.
6	*/
7	#define FBGEMM_EXPORTS
8	#include "fbgemm/FbgemmI8DepthwiseAvx2.h"
9
10	#include <stdexcept> // for logic_error
11	#include <string>
12
13	#include "./FbgemmI8Depthwise2DAvx2-inl.h"
14
15	namespace fbgemm {
16
17	// Old interface
18	template <typename BIAS_TYPE /=std::int32_t/>
19	void depthwise_2d_per_channel_quantization_same_pad(
20	int N,
21	int H,
22	int W,
23	int IC_OC,
24	int stride_h,
25	int stride_w,
26	int32_t A_zero_point,
27	const uint8_t* A,
28	const int32_t* B_zero_point,
29	const PackedDepthWiseConvMatrix& Bp,
30	const float* C_multiplier,
31	int32_t C_zero_point,
32	uint8_t* C,
33	const int32_t* col_offsets,
34	const BIAS_TYPE* bias,
35	bool fuse_relu,
36	const float* act_times_w_scale,
37	int thread_id,
38	int num_threads) {
39	depthwise_2d_same_pad<QuantizationGranularity::OUT_CHANNEL>(
40	N,
41	H,
42	W,
43	IC_OC,
44	IC_OC,
45	stride_h,
46	stride_w,
47	A_zero_point,
48	A,
49	B_zero_point,
50	Bp,
51	C_multiplier,
52	C_zero_point,
53	C,
54	col_offsets,
55	bias,
56	fuse_relu,
57	act_times_w_scale,
58	thread_id,
59	num_threads);
60	}
61
62	template FBGEMM_API void
63	depthwise_2d_per_channel_quantization_same_pad<int32_t>(
64	int N,
65	int H,
66	int W,
67	int IC_OC,
68	int stride_h,
69	int stride_w,
70	int32_t A_zero_point,
71	const uint8_t* A,
72	const int32_t* B_zero_point,
73	const PackedDepthWiseConvMatrix& Bp,
74	const float* C_multiplier,
75	int32_t C_zero_point,
76	uint8_t* C,
77	const int32_t* col_offsets,
78	const int32_t* bias,
79	bool fuse_relu,
80	const float* act_times_w_scale,
81	int thread_id,
82	int num_threads);
83
84	template FBGEMM_API void depthwise_2d_per_channel_quantization_same_pad<float>(
85	int N,
86	int H,
87	int W,
88	int IC_OC,
89	int stride_h,
90	int stride_w,
91	int32_t A_zero_point,
92	const uint8_t* A,
93	const int32_t* B_zero_point,
94	const PackedDepthWiseConvMatrix& Bp,
95	const float* C_multiplier,
96	int32_t C_zero_point,
97	uint8_t* C,
98	const int32_t* col_offsets,
99	const float* bias,
100	bool fuse_relu,
101	const float* act_times_w_scale,
102	int thread_id,
103	int num_threads);
104
105	} // namespace fbgemm
106

Browse the source code of pytorch/third_party/fbgemm/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc