1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#ifdef _MSC_VER
14#include <windows.h>
15#else
16#include <pthread.h>
17#endif
18
19#include <cpuinfo.h>
20#include <qnnpack.h>
21#include <qnnpack/log.h>
22#include <qnnpack/params.h>
23#include <qnnpack/q8avgpool.h>
24#include <qnnpack/q8conv.h>
25#include <qnnpack/q8dwconv.h>
26#include <qnnpack/q8gavgpool.h>
27#include <qnnpack/q8gemm.h>
28#include <qnnpack/q8vadd.h>
29#include <qnnpack/u8clamp.h>
30#include <qnnpack/u8lut32norm.h>
31#include <qnnpack/u8maxpool.h>
32#include <qnnpack/u8rmax.h>
33#include <qnnpack/x8lut.h>
34#include <qnnpack/x8zip.h>
35
36#ifdef _MSC_VER
37static INIT_ONCE init_guard;
38BOOL CALLBACK init_win(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex);
39#else
40static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
41#endif
42
43struct qnnp_parameters qnnp_params = {
44 .initialized = false
45};
46
47static void init(void) {
48#if CPUINFO_ARCH_ARM
49 if (!cpuinfo_has_arm_neon()) {
50 qnnp_log_error("QNNPACK initialization failed: NEON is not supported");
51 return;
52 }
53 qnnp_params.q8conv = (struct q8conv_parameters) {
54 .gemm = q8gemm_ukernel_4x8__aarch32_neon,
55 .conv = q8conv_ukernel_4x8__aarch32_neon,
56 .mr = 4,
57 .nr = 8,
58 .kr = 1,
59 };
60 qnnp_params.q8conv_xzp = (struct q8conv_xzp_parameters) {
61 .gemm = q8gemm_xzp_ukernel_4x8c2__aarch32_neon,
62 .mr = 4,
63 .nr = 8,
64 .kr = 2,
65 .kc = 8,
66 .kthreshold = SIZE_MAX,
67 };
68 /* setup xzp threshold based on measurements */
69 switch (cpuinfo_get_core(0)->uarch) {
70 case cpuinfo_uarch_cortex_a72:
71 qnnp_params.q8conv_xzp.kthreshold = 64;
72 break;
73 case cpuinfo_uarch_cortex_a73:
74 qnnp_params.q8conv_xzp.kthreshold = 256;
75 break;
76 case cpuinfo_uarch_cortex_a75:
77 qnnp_params.q8conv_xzp.kthreshold = 32;
78 break;
79 case cpuinfo_uarch_cortex_a76:
80 qnnp_params.q8conv_xzp.kthreshold = 16;
81 break;
82 default:
83 break;
84 }
85 qnnp_params.q8dw9 = (struct q8dwconv_up_parameters) {
86 .updw = q8dwconv_ukernel_up8x9__aarch32_neon,
87 .cr = 8,
88 };
89 qnnp_params.q8dw25 = (struct q8dwconv_mp_parameters) {
90 .mpdw = q8dwconv_ukernel_mp8x25__neon,
91 .cr = 8,
92 };
93 qnnp_params.q8sum_rows = (struct q8sum_rows_parameters) {
94 .sum_rows = q8sumrows_ukernel_4x__neon,
95 .m = 4,
96 };
97 qnnp_params.q8vadd = q8vadd_ukernel__neon;
98 qnnp_params.q8gavgpool = (struct q8gavgpool_parameters) {
99 .ltnr = q8gavgpool_ukernel_up8xm__neon,
100 .genr_lemr = q8gavgpool_ukernel_up8x7__neon,
101 .genr_gtmr = q8gavgpool_ukernel_mp8x7p7q__neon,
102 .mr = 7,
103 .nr = 8,
104 };
105 qnnp_params.q8avgpool = (struct q8avgpool_parameters) {
106 .ltkr = q8avgpool_ukernel_up8xm__neon,
107 .gekr_lemr = q8avgpool_ukernel_up8x9__neon,
108 .gekr_gtmr = q8avgpool_ukernel_mp8x9p8q__neon,
109 .mr = 9,
110 .qr = 8,
111 .kr = 8,
112 };
113 qnnp_params.u8maxpool = (struct u8maxpool_parameters) {
114 .ltkr = u8maxpool_ukernel_sub16__neon,
115 .gekr = u8maxpool_ukernel_16x9p8q__neon,
116 .mr = 9,
117 .qr = 8,
118 .kr = 16,
119 };
120 qnnp_params.x8zip = (struct x8zip_parameters) {
121 .x2 = qnnp_x8zip_x2__neon,
122 .x3 = qnnp_x8zip_x3__neon,
123 .x4 = qnnp_x8zip_x4__neon,
124 .xm = qnnp_x8zip_xm__neon,
125 };
126 qnnp_params.u8clamp = u8clamp_ukernel__neon;
127 qnnp_params.u8rmax = u8rmax_ukernel__neon;
128 qnnp_params.u8lut32norm = u8lut32norm_ukernel__scalar;
129 qnnp_params.x8lut = x8lut_ukernel__scalar;
130#elif CPUINFO_ARCH_ARM64
131 qnnp_params.q8conv = (struct q8conv_parameters) {
132 .gemm = q8gemm_ukernel_8x8__aarch64_neon,
133 .conv = q8conv_ukernel_8x8__aarch64_neon,
134 .mr = 8,
135 .nr = 8,
136 .kr = 1,
137 };
138 qnnp_params.q8conv_xzp = (struct q8conv_xzp_parameters) {
139 .kthreshold = SIZE_MAX,
140 };
141 qnnp_params.q8dw9 = (struct q8dwconv_up_parameters) {
142 .updw = q8dwconv_ukernel_up8x9__neon,
143 .cr = 8,
144 };
145 qnnp_params.q8dw25 = (struct q8dwconv_mp_parameters) {
146 .mpdw = q8dwconv_ukernel_mp8x25__neon,
147 .cr = 8,
148 };
149 qnnp_params.q8vadd = q8vadd_ukernel__neon;
150 qnnp_params.q8gavgpool = (struct q8gavgpool_parameters) {
151 .ltnr = q8gavgpool_ukernel_up8xm__neon,
152 .genr_lemr = q8gavgpool_ukernel_up8x7__neon,
153 .genr_gtmr = q8gavgpool_ukernel_mp8x7p7q__neon,
154 .mr = 7,
155 .nr = 8,
156 };
157 qnnp_params.q8avgpool = (struct q8avgpool_parameters) {
158 .ltkr = q8avgpool_ukernel_up8xm__neon,
159 .gekr_lemr = q8avgpool_ukernel_up8x9__neon,
160 .gekr_gtmr = q8avgpool_ukernel_mp8x9p8q__neon,
161 .mr = 9,
162 .qr = 8,
163 .kr = 8,
164 };
165 qnnp_params.u8maxpool = (struct u8maxpool_parameters) {
166 .ltkr = u8maxpool_ukernel_sub16__neon,
167 .gekr = u8maxpool_ukernel_16x9p8q__neon,
168 .mr = 9,
169 .qr = 8,
170 .kr = 16,
171 };
172 qnnp_params.x8zip = (struct x8zip_parameters) {
173 .x2 = qnnp_x8zip_x2__neon,
174 .x3 = qnnp_x8zip_x3__neon,
175 .x4 = qnnp_x8zip_x4__neon,
176 .xm = qnnp_x8zip_xm__neon,
177 };
178 qnnp_params.u8clamp = u8clamp_ukernel__neon;
179 qnnp_params.u8rmax = u8rmax_ukernel__neon;
180 qnnp_params.u8lut32norm = u8lut32norm_ukernel__scalar;
181 qnnp_params.x8lut = x8lut_ukernel__scalar;
182#elif CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
183 if (!cpuinfo_has_x86_sse2()) {
184 qnnp_log_error("QNNPACK initialization failed: SSE2 is not supported");
185 return;
186 }
187 qnnp_params.q8conv = (struct q8conv_parameters){
188 .gemm = q8gemm_ukernel_4x4c2__sse2,
189 .conv = q8conv_ukernel_4x4c2__sse2,
190 .mr = 4,
191 .nr = 4,
192 .kr = 2,
193 };
194 qnnp_params.q8conv_xzp = (struct q8conv_xzp_parameters) {
195 .kthreshold = SIZE_MAX,
196 };
197 qnnp_params.q8dw9 = (struct q8dwconv_up_parameters) {
198 .updw = q8dwconv_ukernel_up8x9__sse2,
199 .cr = 8,
200 };
201 qnnp_params.q8dw25 = (struct q8dwconv_mp_parameters) {
202 .mpdw = q8dwconv_ukernel_mp8x25__sse2,
203 .cr = 8,
204 };
205 qnnp_params.q8vadd = q8vadd_ukernel__sse2;
206 qnnp_params.q8gavgpool = (struct q8gavgpool_parameters) {
207 .ltnr = q8gavgpool_ukernel_up8xm__sse2,
208 .genr_lemr = q8gavgpool_ukernel_up8x7__sse2,
209 .genr_gtmr = q8gavgpool_ukernel_mp8x7p7q__sse2,
210 .mr = 7,
211 .nr = 8,
212 };
213 qnnp_params.q8avgpool = (struct q8avgpool_parameters) {
214 .ltkr = q8avgpool_ukernel_up8xm__sse2,
215 .gekr_lemr = q8avgpool_ukernel_up8x9__sse2,
216 .gekr_gtmr = q8avgpool_ukernel_mp8x9p8q__sse2,
217 .mr = 9,
218 .qr = 8,
219 .kr = 8,
220 };
221 qnnp_params.u8maxpool = (struct u8maxpool_parameters) {
222 .ltkr = u8maxpool_ukernel_sub16__sse2,
223 .gekr = u8maxpool_ukernel_16x9p8q__sse2,
224 .mr = 9,
225 .qr = 8,
226 .kr = 16,
227 };
228 qnnp_params.x8zip = (struct x8zip_parameters) {
229 .x2 = qnnp_x8zip_x2__sse2,
230 .x3 = qnnp_x8zip_x3__sse2,
231 .x4 = qnnp_x8zip_x4__sse2,
232 .xm = qnnp_x8zip_xm__sse2,
233 };
234 qnnp_params.u8clamp = u8clamp_ukernel__sse2;
235 qnnp_params.u8rmax = u8rmax_ukernel__sse2;
236 qnnp_params.u8lut32norm = u8lut32norm_ukernel__scalar;
237 qnnp_params.x8lut = x8lut_ukernel__scalar;
238#else
239 #error "Unsupported architecture"
240#endif
241 qnnp_params.initialized = true;
242}
243
244enum qnnp_status qnnp_initialize(void) {
245 if (!cpuinfo_initialize()) {
246 return qnnp_status_out_of_memory;
247 }
248#ifdef _MSC_VER
249 InitOnceExecuteOnce(&init_guard, init_win, NULL, NULL);
250#else
251 pthread_once(&init_guard, &init);
252#endif
253 if (qnnp_params.initialized) {
254 return qnnp_status_success;
255 } else {
256 return qnnp_status_unsupported_hardware;
257 }
258}
259
260enum qnnp_status qnnp_deinitialize(void) {
261 cpuinfo_deinitialize();
262 return qnnp_status_success;
263}
264
265#ifdef _MSC_VER
266BOOL CALLBACK init_win(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex) {
267 init();
268 return TRUE;
269}
270#endif
271