1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under the BSD-style license found in the |
6 | * LICENSE file in the root directory of this source tree. |
7 | */ |
8 | |
9 | #include <assert.h> |
10 | #include <stddef.h> |
11 | #include <stdint.h> |
12 | #include <string.h> |
13 | |
14 | #include <qnnpack.h> |
15 | #include <qnnpack/operator.h> |
16 | #include <qnnpack/log.h> |
17 | #include <qnnpack/common.h> |
18 | #include <qnnpack/math.h> |
19 | #include <qnnpack/params.h> |
20 | |
21 | #ifdef _MSC_VER |
22 | #include <malloc.h> |
23 | #endif |
24 | |
25 | struct q8gemm_context { |
26 | size_t k; |
27 | size_t k_stride; |
28 | size_t n; |
29 | size_t n_stride; |
30 | const uint8_t* a; |
31 | size_t a_stride; |
32 | const uint8_t* packed_w; |
33 | uint8_t* c; |
34 | size_t c_stride; |
35 | union qnnp_conv_quantization_params quantization_params; |
36 | const q8gemm_ukernel_function ukernel; |
37 | }; |
38 | |
39 | static void compute_q8gemm( |
40 | const struct q8gemm_context context[RESTRICT_STATIC 1], |
41 | size_t group_index, |
42 | size_t pixel_index, |
43 | size_t mr_block_start, |
44 | size_t nr_block_start, |
45 | size_t group_range /* always 1 */, |
46 | size_t pixel_range, |
47 | size_t mr_block_size, |
48 | size_t nr_block_size) |
49 | { |
50 | const size_t k = context->k; |
51 | const size_t k_stride = context->k_stride; |
52 | const size_t n = context->n; |
53 | const size_t n_stride = context->n_stride; |
54 | const uint8_t* restrict a = context->a; |
55 | const size_t a_stride = context->a_stride; |
56 | const void* restrict packed_w = context->packed_w; |
57 | uint8_t* restrict c = context->c; |
58 | const size_t c_stride = context->c_stride; |
59 | |
60 | context->ukernel( |
61 | mr_block_size, |
62 | nr_block_size, |
63 | k, |
64 | a + (pixel_index + mr_block_start) * a_stride + group_index * k, |
65 | a_stride, |
66 | (const void*) ((uintptr_t) packed_w + (nr_block_start + group_index * n_stride) * (k_stride * sizeof(uint8_t) + sizeof(int32_t))), |
67 | c + (pixel_index + mr_block_start) * c_stride + nr_block_start + group_index * n, |
68 | c_stride, |
69 | &context->quantization_params); |
70 | } |
71 | |
72 | struct q8sum_rows_context { |
73 | const uint8_t* a; |
74 | size_t groups; |
75 | size_t m; |
76 | size_t k; |
77 | size_t a_stride; |
78 | const int32_t multiplier; |
79 | int32_t* a_sum; |
80 | size_t a_sum_stride; |
81 | const q8sum_rows_ukernel_function ukernel; |
82 | }; |
83 | |
84 | static void compute_sum_rows( |
85 | const struct q8sum_rows_context context[RESTRICT_STATIC 1], |
86 | size_t group_index, |
87 | size_t batch_index, |
88 | size_t block_start, |
89 | size_t group_range /* always 1 */, |
90 | size_t batch_range /* always 1 */, |
91 | size_t block_size) |
92 | { |
93 | const uint8_t* a = context->a; |
94 | const size_t groups = context->groups; |
95 | const size_t m = context->m; |
96 | const size_t k = context->k; |
97 | const size_t a_stride = context->a_stride; |
98 | const int32_t multiplier = context->multiplier; |
99 | int32_t* a_sum = context->a_sum; |
100 | const size_t a_sum_stride = context->a_sum_stride; |
101 | |
102 | context->ukernel( |
103 | a + batch_index * m * a_stride + group_index * k + block_start * a_stride, |
104 | min(block_size, m - block_start), |
105 | k, |
106 | a_stride, |
107 | multiplier, |
108 | a_sum + batch_index * groups * a_sum_stride + group_index * a_sum_stride + block_start); |
109 | } |
110 | |
111 | struct q8gemm_xzp_context { |
112 | size_t k; |
113 | size_t k_stride; |
114 | size_t n; |
115 | size_t n_stride; |
116 | const uint8_t* a; |
117 | size_t a_stride; |
118 | const void* packed_w; |
119 | uint8_t* c; |
120 | size_t c_stride; |
121 | const int32_t* a_sum; |
122 | size_t groups; |
123 | size_t batch_size; |
124 | size_t a_sum_stride; |
125 | union qnnp_q31_requantization_params requantization_params; |
126 | const q8gemm_xzp_ukernel_function ukernel; |
127 | }; |
128 | |
129 | static void compute_q8gemm_xzp( |
130 | const struct q8gemm_xzp_context context[RESTRICT_STATIC 1], |
131 | size_t group_index, |
132 | size_t pixel_index, |
133 | size_t mr_block_start, |
134 | size_t nr_block_start, |
135 | size_t group_range /* always 1 */, |
136 | size_t pixel_range, |
137 | size_t mr_block_size, |
138 | size_t nr_block_size) |
139 | { |
140 | const size_t k = context->k; |
141 | const size_t k_stride = context->k_stride; |
142 | const size_t n = context->n; |
143 | const size_t n_stride = context->n_stride; |
144 | const uint8_t* restrict a = context->a; |
145 | const size_t a_stride = context->a_stride; |
146 | const void* restrict packed_w = context->packed_w; |
147 | uint8_t* restrict c = context->c; |
148 | const size_t c_stride = context->c_stride; |
149 | const int32_t* a_sum = context->a_sum; |
150 | const size_t groups = context->groups; |
151 | const size_t a_sum_stride = context->a_sum_stride; |
152 | |
153 | context->ukernel( |
154 | mr_block_size, |
155 | nr_block_size, |
156 | k, |
157 | a + (pixel_index + mr_block_start) * a_stride + group_index * k, |
158 | a_stride, |
159 | a_sum + pixel_index * groups + group_index * a_sum_stride + mr_block_start, |
160 | (const void*) ((uintptr_t) packed_w + (nr_block_start + group_index * n_stride) * (k_stride * sizeof(uint8_t) + sizeof(int32_t))), |
161 | c + (pixel_index + mr_block_start) * c_stride + nr_block_start + group_index * n, |
162 | c_stride, |
163 | &context->requantization_params); |
164 | } |
165 | |
166 | struct q8conv_context { |
167 | size_t bs; |
168 | size_t ks; |
169 | size_t kc; |
170 | size_t kc_stride; |
171 | size_t m; |
172 | size_t m_stride; |
173 | size_t n; |
174 | size_t n_stride; |
175 | const uint8_t** indirect_a; |
176 | const void* packed_w; |
177 | uint8_t* c; |
178 | size_t c_stride; |
179 | union qnnp_conv_quantization_params quantization_params; |
180 | const q8conv_ukernel_function ukernel; |
181 | }; |
182 | |
183 | static void compute_q8conv( |
184 | const struct q8conv_context context[RESTRICT_STATIC 1], |
185 | size_t group_index, |
186 | size_t image_index, |
187 | size_t mr_block_start, |
188 | size_t nr_block_start, |
189 | size_t group_range /* always 1 */, |
190 | size_t image_range /* always 1 */, |
191 | size_t mr_block_size, |
192 | size_t nr_block_size) |
193 | { |
194 | const size_t bs = context->bs; |
195 | const size_t ks = context->ks; |
196 | const size_t kc = context->kc; |
197 | const size_t kc_stride = context->kc_stride; |
198 | const size_t m = context->m; |
199 | const size_t m_stride = context->m_stride; |
200 | const size_t n = context->n; |
201 | const size_t n_stride = context->n_stride; |
202 | const uint8_t** restrict indirect_a = context->indirect_a; |
203 | const void* restrict packed_w = context->packed_w; |
204 | uint8_t* restrict c = context->c; |
205 | const size_t c_stride = context->c_stride; |
206 | |
207 | context->ukernel( |
208 | mr_block_size, |
209 | nr_block_size, |
210 | kc, |
211 | ks, |
212 | indirect_a + (mr_block_start + (image_index + group_index * bs) * m_stride) * ks, |
213 | (const void*) ((uintptr_t) packed_w + (nr_block_start + group_index * n_stride) * (kc_stride * sizeof(uint8_t) + sizeof(int32_t))), |
214 | c + (mr_block_start + image_index * m) * c_stride + group_index * n + nr_block_start, |
215 | c_stride, |
216 | &context->quantization_params); |
217 | } |
218 | |
219 | struct q8dwconv_context { |
220 | size_t groups; |
221 | size_t group_stride; |
222 | const uint8_t** indirection_buffer; |
223 | size_t indirection_buffer_row_stride; |
224 | size_t indirection_buffer_col_stride; |
225 | const void* packed_weights; |
226 | uint8_t* output; |
227 | size_t output_height; |
228 | size_t output_width; |
229 | size_t output_row_stride; |
230 | size_t output_col_increment; |
231 | union qnnp_conv_quantization_params quantization_params; |
232 | union { |
233 | const q8dwconv_up_ukernel_function unipass_ukernel; |
234 | const q8dwconv_mp_ukernel_function multipass_ukernel; |
235 | }; |
236 | }; |
237 | |
238 | static void compute_dwconv_unipass( |
239 | const struct q8dwconv_context context[RESTRICT_STATIC 1], |
240 | size_t image, |
241 | size_t output_y) |
242 | { |
243 | const size_t output_height = context->output_height; |
244 | |
245 | context->unipass_ukernel( |
246 | context->groups, |
247 | context->output_width, |
248 | context->indirection_buffer + (image * output_height + output_y) * context->indirection_buffer_row_stride, |
249 | context->packed_weights, |
250 | context->output + (image * output_height + output_y) * context->output_row_stride, |
251 | context->indirection_buffer_col_stride, |
252 | context->output_col_increment, |
253 | &context->quantization_params); |
254 | } |
255 | |
256 | static void compute_dwconv_multiipass( |
257 | const struct q8dwconv_context context[RESTRICT_STATIC 1], |
258 | size_t image, |
259 | size_t output_y) |
260 | { |
261 | const size_t output_height = context->output_height; |
262 | QNNP_ALIGN(16) |
263 | #ifdef _MSC_VER |
264 | int32_t* multipass_acc = _malloca(sizeof(int32_t) * context->group_stride); |
265 | #else |
266 | int32_t multipass_acc[context->group_stride]; |
267 | #endif |
268 | |
269 | |
270 | context->multipass_ukernel( |
271 | context->groups, |
272 | context->output_width, |
273 | context->indirection_buffer + (image * output_height + output_y) * context->indirection_buffer_row_stride, |
274 | context->packed_weights, |
275 | multipass_acc, |
276 | context->output + (image * output_height + output_y) * context->output_row_stride, |
277 | context->indirection_buffer_col_stride, |
278 | context->output_col_increment, |
279 | &context->quantization_params); |
280 | |
281 | #ifdef _MSC_VER |
282 | _freea(multipass_acc); |
283 | #endif |
284 | } |
285 | |
286 | struct max_pooling_context { |
287 | const void** indirect_input; |
288 | size_t indirect_input_batch_stride; |
289 | size_t indirect_input_height_stride; |
290 | void* output; |
291 | size_t output_batch_stride; |
292 | size_t output_height_stride; |
293 | size_t output_width; |
294 | size_t pooling_size; |
295 | size_t channels; |
296 | size_t input_increment; |
297 | size_t output_increment; |
298 | union qnnp_u8_clamping_params params; |
299 | u8maxpool_ukernel_function ukernel; |
300 | }; |
301 | |
302 | static void compute_max_pooling( |
303 | const struct max_pooling_context context[RESTRICT_STATIC 1], |
304 | size_t batch_index, |
305 | size_t output_y) |
306 | { |
307 | const void** indirect_input = |
308 | (const void**) ((uintptr_t) context->indirect_input + |
309 | batch_index * context->indirect_input_batch_stride + output_y * context->indirect_input_height_stride); |
310 | void* output = |
311 | (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride + output_y * context->output_height_stride); |
312 | |
313 | context->ukernel( |
314 | context->output_width, context->pooling_size, context->channels, |
315 | (const uint8_t**) indirect_input, output, |
316 | context->input_increment, context->output_increment, |
317 | &context->params); |
318 | } |
319 | |
320 | struct average_pooling_context { |
321 | const void** indirect_input; |
322 | size_t indirect_input_batch_stride; |
323 | size_t indirect_input_height_stride; |
324 | void* output; |
325 | size_t output_batch_stride; |
326 | size_t output_height_stride; |
327 | size_t output_width; |
328 | size_t pooling_size; |
329 | size_t channels; |
330 | size_t packed_channels; |
331 | const void* zero; |
332 | size_t input_increment; |
333 | size_t output_increment; |
334 | union qnnp_avgpool_quantization_params quantization_params; |
335 | union { |
336 | q8avgpool_up_ukernel_function unipass_ukernel; |
337 | q8avgpool_mp_ukernel_function multipass_ukernel; |
338 | }; |
339 | }; |
340 | |
341 | static void compute_average_pooling_unipass( |
342 | const struct average_pooling_context context[RESTRICT_STATIC 1], |
343 | size_t batch_index, |
344 | size_t output_y) |
345 | { |
346 | const void** indirect_input = |
347 | (const void**) ((uintptr_t) context->indirect_input + |
348 | batch_index * context->indirect_input_batch_stride + output_y * context->indirect_input_height_stride); |
349 | void* output = |
350 | (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride + output_y * context->output_height_stride); |
351 | |
352 | context->unipass_ukernel( |
353 | context->output_width, context->pooling_size, context->channels, |
354 | (const uint8_t**) indirect_input, context->zero, output, |
355 | context->input_increment, context->output_increment, |
356 | &context->quantization_params); |
357 | } |
358 | |
359 | static void compute_average_pooling_multipass( |
360 | const struct average_pooling_context context[RESTRICT_STATIC 1], |
361 | size_t batch_index, |
362 | size_t output_y) |
363 | { |
364 | const void** indirect_input = |
365 | (const void**) ((uintptr_t) context->indirect_input + |
366 | batch_index * context->indirect_input_batch_stride + output_y * context->indirect_input_height_stride); |
367 | void* output = |
368 | (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride + output_y * context->output_height_stride); |
369 | QNNP_ALIGN(16) |
370 | #ifdef _MSC_VER |
371 | int32_t* multipass_buffer = _malloca(sizeof(int32_t) * context->packed_channels); |
372 | #else |
373 | int32_t multipass_buffer[context->packed_channels]; |
374 | #endif |
375 | |
376 | context->multipass_ukernel( |
377 | context->output_width, context->pooling_size, context->channels, |
378 | (const uint8_t**) indirect_input, context->zero, multipass_buffer, output, |
379 | context->input_increment, context->output_increment, |
380 | &context->quantization_params); |
381 | |
382 | #ifdef _MSC_VER |
383 | _freea(multipass_buffer); |
384 | #endif |
385 | } |
386 | |
387 | struct global_average_pooling_context { |
388 | const void* input; |
389 | const void* zero; |
390 | size_t input_pixel_stride; |
391 | size_t input_batch_stride; |
392 | size_t input_elements; |
393 | size_t channels; |
394 | size_t packed_channels; |
395 | void* output; |
396 | size_t output_batch_stride; |
397 | union qnnp_avgpool_quantization_params quantization_params; |
398 | union { |
399 | q8gavgpool_up_ukernel_function unipass_ukernel; |
400 | q8gavgpool_mp_ukernel_function multipass_ukernel; |
401 | }; |
402 | }; |
403 | |
404 | static void compute_global_average_pooling_unipass( |
405 | const struct global_average_pooling_context context[RESTRICT_STATIC 1], |
406 | size_t batch_index) |
407 | { |
408 | const void* input = |
409 | (const void*) ((uintptr_t) context->input + batch_index * context->input_batch_stride); |
410 | void* output = |
411 | (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride); |
412 | |
413 | context->unipass_ukernel( |
414 | context->input_elements, |
415 | context->channels, |
416 | input, |
417 | context->input_pixel_stride, |
418 | context->zero, |
419 | output, |
420 | &context->quantization_params); |
421 | } |
422 | |
423 | static void compute_global_average_pooling_multipass( |
424 | const struct global_average_pooling_context context[RESTRICT_STATIC 1], |
425 | size_t batch_index) |
426 | { |
427 | const void* input = |
428 | (const void*) ((uintptr_t) context->input + batch_index * context->input_batch_stride); |
429 | void* output = |
430 | (void*) ((uintptr_t) context->output + batch_index * context->output_batch_stride); |
431 | QNNP_ALIGN(16) |
432 | #ifdef _MSC_VER |
433 | int32_t* multipass_buffer = _malloca(sizeof(int32_t) * context->packed_channels); |
434 | #else |
435 | int32_t multipass_buffer[context->packed_channels]; |
436 | #endif |
437 | |
438 | |
439 | context->multipass_ukernel( |
440 | context->input_elements, |
441 | context->channels, |
442 | input, |
443 | context->input_pixel_stride, |
444 | context->zero, |
445 | multipass_buffer, |
446 | output, |
447 | &context->quantization_params); |
448 | |
449 | #ifdef _MSC_VER |
450 | _freea(multipass_buffer); |
451 | #endif |
452 | } |
453 | |
454 | struct q8add_strided_context { |
455 | size_t n; |
456 | const uint8_t* a; |
457 | size_t a_stride; |
458 | const uint8_t* b; |
459 | size_t b_stride; |
460 | const uint8_t* y; |
461 | size_t y_stride; |
462 | union qnnp_add_quantization_params quantization_params; |
463 | q8vadd_ukernel_function ukernel; |
464 | }; |
465 | |
466 | static void compute_q8add_strided( |
467 | const struct q8add_strided_context context[RESTRICT_STATIC 1], |
468 | size_t batch_offset, |
469 | size_t batch_range /* always 1 */) |
470 | { |
471 | assert(batch_range == 1); |
472 | |
473 | const size_t n = context->n; |
474 | const size_t a_stride = context->a_stride; |
475 | const size_t b_stride = context->b_stride; |
476 | const size_t y_stride = context->y_stride; |
477 | const void* a = (const void*) ((uintptr_t) context->a + a_stride * batch_offset); |
478 | const void* b = (const void*) ((uintptr_t) context->b + b_stride * batch_offset); |
479 | void* y = (void*) ((uintptr_t) context->y + y_stride * batch_offset); |
480 | |
481 | context->ukernel(n, a, b, y, &context->quantization_params); |
482 | } |
483 | |
484 | struct q8add_contiguous_context { |
485 | const uint8_t* a; |
486 | const uint8_t* b; |
487 | uint8_t* y; |
488 | union qnnp_add_quantization_params quantization_params; |
489 | q8vadd_ukernel_function ukernel; |
490 | }; |
491 | |
492 | static void compute_q8add_contiguous( |
493 | const struct q8add_contiguous_context context[RESTRICT_STATIC 1], |
494 | size_t offset, |
495 | size_t size) |
496 | { |
497 | const void* a = (const void*) ((uintptr_t) context->a + offset); |
498 | const void* b = (const void*) ((uintptr_t) context->b + offset); |
499 | void* y = (void*) ((uintptr_t) context->y + offset); |
500 | context->ukernel(size, a, b, y, &context->quantization_params); |
501 | } |
502 | |
503 | struct channel_shuffle_context { |
504 | const void* x; |
505 | size_t x_stride; |
506 | void* y; |
507 | size_t y_stride; |
508 | size_t n; |
509 | size_t m; |
510 | union { |
511 | xzipc_ukernel_function fixed_ukernel; |
512 | xzipv_ukernel_function variable_ukernel; |
513 | }; |
514 | }; |
515 | |
516 | static void compute_channel_shuffle_fixed( |
517 | const struct channel_shuffle_context context[RESTRICT_STATIC 1], |
518 | size_t index) |
519 | { |
520 | const void* x = (const void*) ((uintptr_t) context->x + index * context->x_stride); |
521 | void* y = (void*) ((uintptr_t) context->y + index * context->y_stride); |
522 | |
523 | context->fixed_ukernel(context->n, x, y); |
524 | } |
525 | |
526 | static void compute_channel_shuffle_variable( |
527 | const struct channel_shuffle_context context[RESTRICT_STATIC 1], |
528 | size_t index) |
529 | { |
530 | const void* x = (const void*) ((uintptr_t) context->x + index * context->x_stride); |
531 | void* y = (void*) ((uintptr_t) context->y + index * context->y_stride); |
532 | |
533 | context->variable_ukernel(context->n, context->m, x, y); |
534 | } |
535 | |
536 | struct lut_strided_context { |
537 | size_t n; |
538 | const void* x; |
539 | size_t x_stride; |
540 | const void* t; |
541 | void* y; |
542 | size_t y_stride; |
543 | x8lut_ukernel_function ukernel; |
544 | }; |
545 | |
546 | static void compute_lut_strided( |
547 | const struct lut_strided_context context[RESTRICT_STATIC 1], |
548 | size_t batch_index) |
549 | { |
550 | const void* x = (const void*) ((uintptr_t) context->x + context->x_stride * batch_index); |
551 | void* y = (void*) ((uintptr_t) context->y + context->y_stride * batch_index); |
552 | |
553 | context->ukernel(context->n, x, context->t, y); |
554 | } |
555 | |
556 | struct lut_contiguous_context { |
557 | const void* x; |
558 | size_t x_stride; |
559 | const void* t; |
560 | void* y; |
561 | size_t y_stride; |
562 | x8lut_ukernel_function ukernel; |
563 | }; |
564 | |
565 | static void compute_lut_contiguous( |
566 | const struct lut_contiguous_context context[RESTRICT_STATIC 1], |
567 | size_t offset, |
568 | size_t size) |
569 | { |
570 | const void* x = (const void*) ((uintptr_t) context->x + offset); |
571 | void* y = (void*) ((uintptr_t) context->y + offset); |
572 | |
573 | context->ukernel(size, x, context->t, y); |
574 | } |
575 | |
576 | struct clamp_strided_context { |
577 | size_t n; |
578 | const void* x; |
579 | size_t x_stride; |
580 | void* y; |
581 | size_t y_stride; |
582 | u8clamp_ukernel_function ukernel; |
583 | union qnnp_u8_clamping_params params; |
584 | }; |
585 | |
586 | static void compute_clamp_strided( |
587 | const struct clamp_strided_context context[RESTRICT_STATIC 1], |
588 | size_t batch_index) |
589 | { |
590 | const void* x = (const void*) ((uintptr_t) context->x + context->x_stride * batch_index); |
591 | void* y = (void*) ((uintptr_t) context->y + context->y_stride * batch_index); |
592 | context->ukernel(context->n, x, y, &context->params); |
593 | } |
594 | |
595 | struct clamp_contiguous_context { |
596 | const void* x; |
597 | size_t x_stride; |
598 | void* y; |
599 | size_t y_stride; |
600 | u8clamp_ukernel_function ukernel; |
601 | union qnnp_u8_clamping_params params; |
602 | }; |
603 | |
604 | static void compute_clamp_contiguous( |
605 | const struct clamp_contiguous_context context[RESTRICT_STATIC 1], |
606 | size_t offset, |
607 | size_t size) |
608 | { |
609 | const void* x = (const void*) ((uintptr_t) context->x + offset); |
610 | void* y = (void*) ((uintptr_t) context->y + offset); |
611 | context->ukernel(size, x, y, &context->params); |
612 | } |
613 | |
614 | struct u8softargmax_context { |
615 | size_t n; |
616 | const uint8_t* x; |
617 | size_t x_stride; |
618 | const uint32_t* t; |
619 | uint8_t* y; |
620 | size_t y_stride; |
621 | u8rmax_ukernel_function rmax_ukernel; |
622 | u8lut32norm_ukernel_function lut_norm_ukernel; |
623 | }; |
624 | |
625 | static void compute_u8softargmax( |
626 | const struct u8softargmax_context context[RESTRICT_STATIC 1], |
627 | size_t batch_index) |
628 | { |
629 | const uint8_t* x = (const uint8_t*) ((uintptr_t) context->x + context->x_stride * batch_index); |
630 | uint8_t* y = (uint8_t*) ((uintptr_t) context->y + context->y_stride * batch_index); |
631 | const size_t n = context->n; |
632 | |
633 | const uint8_t x_max = context->rmax_ukernel(n, x); |
634 | const size_t adjustment = x_max ^ 255; |
635 | const uint32_t* t = (const uint32_t*) context->t + adjustment; |
636 | context->lut_norm_ukernel(n, x, t, y); |
637 | } |
638 | |
639 | enum qnnp_status qnnp_run_operator(qnnp_operator_t op, pthreadpool_t threadpool) |
640 | { |
641 | // For any ukernel type, there is no work to do if the batch size is 0. |
642 | if (op->batch_size == 0) { |
643 | return qnnp_status_success; |
644 | } |
645 | |
646 | switch (op->ukernel_type) { |
647 | case qnnp_ukernel_type_dwconv: |
648 | { |
649 | const size_t batch_size = op->batch_size; |
650 | const size_t groups = op->groups; |
651 | const size_t kernel_height = op->kernel_height; |
652 | const size_t kernel_width = op->kernel_width; |
653 | const size_t kernel_size = kernel_height * kernel_width; |
654 | const size_t width_step = op->dilation_width == 1 ? op->stride_width : op->kernel_width; |
655 | const size_t output_height = op->output_height; |
656 | const size_t output_width = op->output_width; |
657 | |
658 | switch (kernel_size) { |
659 | case 9: |
660 | { |
661 | struct q8dwconv_context context = { |
662 | .groups = groups, |
663 | .indirection_buffer = (const uint8_t**) op->indirection_buffer, |
664 | .indirection_buffer_row_stride = kernel_size + (output_width * width_step - 1) * kernel_height, |
665 | .indirection_buffer_col_stride = kernel_height * width_step * sizeof(void*), |
666 | .packed_weights = op->packed_weights, |
667 | .output = op->output, |
668 | .output_height = output_height, |
669 | .output_width = output_width, |
670 | .output_row_stride = output_width * op->output_pixel_stride, |
671 | .output_col_increment = (op->output_pixel_stride - groups) * sizeof(uint8_t), |
672 | .quantization_params = op->conv_quantization_params, |
673 | .unipass_ukernel = qnnp_params.q8dw9.updw, |
674 | }; |
675 | pthreadpool_compute_2d( |
676 | threadpool, |
677 | (pthreadpool_function_2d_t) compute_dwconv_unipass, |
678 | &context, |
679 | batch_size, output_height); |
680 | break; |
681 | } |
682 | case 25: |
683 | { |
684 | struct q8dwconv_context context = { |
685 | .groups = groups, |
686 | .group_stride = op->group_stride, |
687 | .indirection_buffer = (const uint8_t**) op->indirection_buffer, |
688 | .indirection_buffer_row_stride = kernel_size + (output_width * width_step - 1) * kernel_height, |
689 | .indirection_buffer_col_stride = kernel_height * width_step * sizeof(void*), |
690 | .packed_weights = op->packed_weights, |
691 | .output = op->output, |
692 | .output_height = output_height, |
693 | .output_width = output_width, |
694 | .output_row_stride = output_width * op->output_pixel_stride, |
695 | .output_col_increment = (op->output_pixel_stride - groups) * sizeof(uint8_t), |
696 | .quantization_params = op->conv_quantization_params, |
697 | .multipass_ukernel = qnnp_params.q8dw25.mpdw, |
698 | }; |
699 | pthreadpool_compute_2d( |
700 | threadpool, |
701 | (pthreadpool_function_2d_t) compute_dwconv_multiipass, |
702 | &context, |
703 | batch_size, output_height); |
704 | break; |
705 | } |
706 | default: |
707 | QNNP_UNREACHABLE; |
708 | } |
709 | break; |
710 | } |
711 | case qnnp_ukernel_type_xzp_gemm: |
712 | { |
713 | const size_t batch_size = op->batch_size; |
714 | const size_t groups = op->groups; |
715 | const size_t group_input_channels = op->group_input_channels; |
716 | const size_t group_output_channels = op->group_output_channels; |
717 | const uint32_t mr = qnnp_params.q8conv_xzp.mr; |
718 | const uint32_t nr = qnnp_params.q8conv_xzp.nr; |
719 | const uint32_t kr = qnnp_params.q8conv_xzp.kr; |
720 | const size_t k_stride = (group_input_channels + (kr - 1)) & -kr; |
721 | const size_t n_stride = (group_output_channels + (nr - 1)) & -nr; |
722 | |
723 | /* compute input row sum */ |
724 | const size_t input_size = op->input_height * op->input_width; |
725 | int32_t* a_sum = (int32_t*) op->a_sum; |
726 | |
727 | struct q8sum_rows_context context = { |
728 | .a = op->input, |
729 | .groups = groups, |
730 | .m = input_size, |
731 | .k = group_input_channels, |
732 | .a_stride = op->input_pixel_stride, |
733 | .multiplier = (int32_t) -op->kernel_zero_point, |
734 | .a_sum = a_sum, |
735 | .a_sum_stride = input_size, |
736 | .ukernel = qnnp_params.q8sum_rows.sum_rows, |
737 | }; |
738 | pthreadpool_compute_3d_tiled( |
739 | threadpool, |
740 | (pthreadpool_function_3d_tiled_t) compute_sum_rows, |
741 | &context, |
742 | groups, batch_size, input_size, |
743 | 1, 1, qnnp_params.q8sum_rows.m); |
744 | |
745 | struct q8gemm_xzp_context q8gemm_xzp_context = { |
746 | .k = group_input_channels, |
747 | .k_stride = k_stride, |
748 | .n = group_output_channels, |
749 | .n_stride = n_stride, |
750 | .a = op->input, |
751 | .a_stride = op->input_pixel_stride, |
752 | .packed_w = op->packed_weights, |
753 | .c = op->output, |
754 | .c_stride = op->output_pixel_stride, |
755 | .a_sum = a_sum, |
756 | .groups = op->groups, |
757 | .batch_size = batch_size, |
758 | .a_sum_stride = input_size, |
759 | .requantization_params = op->requantization_params, |
760 | .ukernel = qnnp_params.q8conv_xzp.gemm, |
761 | }; |
762 | pthreadpool_compute_4d_tiled( |
763 | threadpool, |
764 | (pthreadpool_function_4d_tiled_t) compute_q8gemm_xzp, |
765 | &q8gemm_xzp_context, |
766 | groups, batch_size * input_size, input_size, group_output_channels, |
767 | 1, input_size, mr, nr); |
768 | break; |
769 | } |
770 | case qnnp_ukernel_type_gemm: |
771 | { |
772 | const size_t batch_size = op->batch_size; |
773 | const size_t groups = op->groups; |
774 | const size_t group_input_channels = op->group_input_channels; |
775 | const size_t group_output_channels = op->group_output_channels; |
776 | const uint32_t mr = qnnp_params.q8conv.mr; |
777 | const uint32_t nr = qnnp_params.q8conv.nr; |
778 | const uint32_t kr = qnnp_params.q8conv.kr; |
779 | const size_t k_stride = (group_input_channels + (kr - 1)) & -kr; |
780 | const size_t n_stride = (group_output_channels + (nr - 1)) & -nr; |
781 | |
782 | const size_t output_size = op->output_height * op->output_width; |
783 | struct q8gemm_context q8gemm_context = { |
784 | .k = group_input_channels, |
785 | .k_stride = k_stride, |
786 | .n = group_output_channels, |
787 | .n_stride = n_stride, |
788 | .a = op->input, |
789 | .a_stride = op->input_pixel_stride, |
790 | .packed_w = op->packed_weights, |
791 | .c = op->output, |
792 | .c_stride = op->output_pixel_stride, |
793 | .quantization_params = op->conv_quantization_params, |
794 | .ukernel = qnnp_params.q8conv.gemm, |
795 | }; |
796 | |
797 | pthreadpool_compute_4d_tiled( |
798 | threadpool, |
799 | (pthreadpool_function_4d_tiled_t) compute_q8gemm, |
800 | &q8gemm_context, |
801 | groups, batch_size * output_size, output_size, group_output_channels, |
802 | 1, output_size, mr, nr); |
803 | break; |
804 | } |
805 | case qnnp_ukernel_type_conv: |
806 | { |
807 | const size_t batch_size = op->batch_size; |
808 | const size_t groups = op->groups; |
809 | const size_t group_input_channels = op->group_input_channels; |
810 | const size_t group_output_channels = op->group_output_channels; |
811 | const uint32_t mr = qnnp_params.q8conv.mr; |
812 | const uint32_t nr = qnnp_params.q8conv.nr; |
813 | const uint32_t kr = qnnp_params.q8conv.kr; |
814 | const size_t k_stride = (group_input_channels + (kr - 1)) & -kr; |
815 | const size_t n_stride = (group_output_channels + (nr - 1)) & -nr; |
816 | |
817 | const size_t output_size = op->output_height * op->output_width; |
818 | const size_t kernel_size = op->kernel_height * op->kernel_width; |
819 | const size_t m_stride = round_up(output_size, mr); |
820 | struct q8conv_context q8conv_context = { |
821 | .bs = batch_size, |
822 | .ks = kernel_size, |
823 | .kc = group_input_channels, |
824 | .kc_stride = k_stride * kernel_size, |
825 | .m = output_size, |
826 | .m_stride = m_stride, |
827 | .n = group_output_channels, |
828 | .n_stride = n_stride, |
829 | .indirect_a = (const uint8_t**) op->indirection_buffer, |
830 | .packed_w = op->packed_weights, |
831 | .c = op->output, |
832 | .c_stride = op->output_pixel_stride, |
833 | .quantization_params = op->conv_quantization_params, |
834 | .ukernel = qnnp_params.q8conv.conv, |
835 | }; |
836 | |
837 | pthreadpool_compute_4d_tiled( |
838 | threadpool, |
839 | (pthreadpool_function_4d_tiled_t) compute_q8conv, |
840 | &q8conv_context, |
841 | groups, batch_size, output_size, group_output_channels, |
842 | 1, 1, mr, nr); |
843 | break; |
844 | } |
845 | case qnnp_ukernel_type_average_pooling: |
846 | { |
847 | const uint32_t kr = qnnp_params.q8avgpool.kr; |
848 | const uint32_t mr = qnnp_params.q8avgpool.mr; |
849 | const uint32_t qr = qnnp_params.q8avgpool.qr; |
850 | const size_t channels = op->channels; |
851 | const size_t output_width = op->output_width; |
852 | const size_t output_height = op->output_height; |
853 | const size_t pooling_height = op->kernel_height; |
854 | const size_t pooling_width = op->kernel_width; |
855 | const size_t pooling_size = pooling_height * pooling_width; |
856 | |
857 | const size_t width_step = min(op->stride_width, pooling_width); |
858 | const size_t indirect_input_height_stride = (pooling_size + (output_width * width_step - 1) * pooling_height) * sizeof(void*); |
859 | const size_t output_height_stride = output_width * op->output_pixel_stride; |
860 | |
861 | size_t multipass_adjustment = 0; |
862 | if (channels >= kr && pooling_size > mr) { |
863 | multipass_adjustment = round_up(pooling_size - mr, qr) + mr - qr; |
864 | } |
865 | struct average_pooling_context context = { |
866 | .indirect_input = op->indirection_buffer, |
867 | .indirect_input_batch_stride = output_height * indirect_input_height_stride, |
868 | .indirect_input_height_stride = indirect_input_height_stride, |
869 | .output = op->output, |
870 | .output_batch_stride = output_height * output_height_stride, |
871 | .output_height_stride = output_height_stride, |
872 | .output_width = output_width, |
873 | .pooling_size = pooling_size, |
874 | .channels = channels, |
875 | .packed_channels = (channels + (kr - 1)) & -kr, |
876 | .zero = op->zero_pointer, |
877 | .input_increment = (pooling_height * width_step - multipass_adjustment) * sizeof(void*), |
878 | .output_increment = (op->output_pixel_stride - channels) * sizeof(uint8_t), |
879 | .quantization_params = op->avgpool_quantization_params, |
880 | }; |
881 | |
882 | pthreadpool_function_2d_t compute_function = NULL; |
883 | if (channels < kr) { |
884 | compute_function = (pthreadpool_function_2d_t) compute_average_pooling_unipass; |
885 | context.unipass_ukernel = qnnp_params.q8avgpool.ltkr; |
886 | } else { |
887 | if (pooling_size <= mr) { |
888 | compute_function = (pthreadpool_function_2d_t) compute_average_pooling_unipass; |
889 | context.unipass_ukernel = qnnp_params.q8avgpool.gekr_lemr; |
890 | } else { |
891 | compute_function = (pthreadpool_function_2d_t) compute_average_pooling_multipass; |
892 | context.multipass_ukernel = qnnp_params.q8avgpool.gekr_gtmr; |
893 | } |
894 | } |
895 | |
896 | pthreadpool_compute_2d(threadpool, compute_function, &context, op->batch_size, output_height); |
897 | break; |
898 | } |
899 | case qnnp_ukernel_type_max_pooling: |
900 | { |
901 | const uint32_t kr = qnnp_params.u8maxpool.kr; |
902 | const uint32_t mr = qnnp_params.u8maxpool.mr; |
903 | const uint32_t qr = qnnp_params.u8maxpool.qr; |
904 | const size_t channels = op->channels; |
905 | const size_t output_width = op->output_width; |
906 | const size_t output_height = op->output_height; |
907 | const size_t pooling_height = op->kernel_height; |
908 | const size_t pooling_width = op->kernel_width; |
909 | const size_t pooling_size = pooling_height * pooling_width; |
910 | |
911 | const size_t width_step = op->dilation_width > 1 ? pooling_width : min(op->stride_width, pooling_width); |
912 | const size_t indirect_input_height_stride = (pooling_size + (output_width * width_step - 1) * pooling_height) * sizeof(void*); |
913 | const size_t output_height_stride = output_width * op->output_pixel_stride; |
914 | |
915 | size_t multipass_adjustment = pooling_size; |
916 | if (channels >= kr) { |
917 | multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr; |
918 | } |
919 | struct max_pooling_context context = { |
920 | .indirect_input = op->indirection_buffer, |
921 | .indirect_input_batch_stride = output_height * indirect_input_height_stride, |
922 | .indirect_input_height_stride = indirect_input_height_stride, |
923 | .output = op->output, |
924 | .output_batch_stride = output_height * output_height_stride, |
925 | .output_height_stride = output_height_stride, |
926 | .output_width = output_width, |
927 | .pooling_size = pooling_size, |
928 | .channels = channels, |
929 | .input_increment = (pooling_height * width_step - multipass_adjustment) * sizeof(void*), |
930 | .output_increment = (op->output_pixel_stride - channels) * sizeof(uint8_t), |
931 | .params = op->u8_clamping_params, |
932 | .ukernel = channels < kr ? qnnp_params.u8maxpool.ltkr : qnnp_params.u8maxpool.gekr, |
933 | }; |
934 | |
935 | pthreadpool_compute_2d(threadpool, |
936 | (pthreadpool_function_2d_t) compute_max_pooling, &context, |
937 | op->batch_size, output_height); |
938 | break; |
939 | }; |
940 | case qnnp_ukernel_type_add: |
941 | { |
942 | const size_t batch_size = op->batch_size; |
943 | const size_t channels = op->channels; |
944 | const size_t a_stride = op->input_pixel_stride; |
945 | const size_t b_stride = op->input2_pixel_stride; |
946 | const size_t y_stride = op->output_pixel_stride; |
947 | if ((((a_stride ^ channels) | (b_stride ^ channels) | (y_stride ^ channels)) == 0) || batch_size == 1) { |
948 | const size_t block_size = 4096; |
949 | struct q8add_contiguous_context add_context = { |
950 | .a = op->input, |
951 | .b = op->input2, |
952 | .y = op->output, |
953 | .quantization_params = op->add_quantization_params, |
954 | .ukernel = qnnp_params.q8vadd, |
955 | }; |
956 | pthreadpool_compute_1d_tiled( |
957 | threadpool, |
958 | (pthreadpool_function_1d_tiled_t) compute_q8add_contiguous, |
959 | &add_context, |
960 | batch_size * channels * sizeof(uint8_t), block_size); |
961 | } else { |
962 | struct q8add_strided_context add_context = { |
963 | .a = op->input, |
964 | .a_stride = a_stride * sizeof(uint8_t), |
965 | .b = op->input2, |
966 | .b_stride = b_stride * sizeof(uint8_t), |
967 | .y = op->output, |
968 | .y_stride = y_stride * sizeof(uint8_t), |
969 | .n = channels, |
970 | .quantization_params = op->add_quantization_params, |
971 | .ukernel = qnnp_params.q8vadd, |
972 | }; |
973 | pthreadpool_compute_1d_tiled( |
974 | threadpool, |
975 | (pthreadpool_function_1d_tiled_t) compute_q8add_strided, |
976 | &add_context, |
977 | batch_size, 1); |
978 | } |
979 | break; |
980 | } |
981 | case qnnp_ukernel_type_global_average_pooling: |
982 | { |
983 | const uint32_t nr = qnnp_params.q8gavgpool.nr; |
984 | const uint32_t mr = qnnp_params.q8gavgpool.mr; |
985 | const size_t input_pixel_stride = op->input_pixel_stride * sizeof(uint8_t); |
986 | const size_t input_width = op->input_width; |
987 | const size_t channels = op->channels; |
988 | struct global_average_pooling_context context = { |
989 | .input = op->input, |
990 | .zero = op->zero_pointer, |
991 | .input_pixel_stride = input_pixel_stride, |
992 | .input_batch_stride = input_pixel_stride * input_width, |
993 | .input_elements = input_width, |
994 | .channels = channels, |
995 | .packed_channels = (channels + (nr - 1)) & -nr, |
996 | .output = op->output, |
997 | .output_batch_stride = op->output_pixel_stride * sizeof(uint8_t), |
998 | .quantization_params = op->avgpool_quantization_params, |
999 | }; |
1000 | pthreadpool_function_1d_t compute_function = NULL; |
1001 | if (channels < nr) { |
1002 | compute_function = (pthreadpool_function_1d_t) compute_global_average_pooling_unipass; |
1003 | context.unipass_ukernel = qnnp_params.q8gavgpool.ltnr; |
1004 | } else { |
1005 | if (input_width <= mr) { |
1006 | compute_function = (pthreadpool_function_1d_t) compute_global_average_pooling_unipass; |
1007 | context.unipass_ukernel = qnnp_params.q8gavgpool.genr_lemr; |
1008 | } else { |
1009 | compute_function = (pthreadpool_function_1d_t) compute_global_average_pooling_multipass; |
1010 | context.multipass_ukernel = qnnp_params.q8gavgpool.genr_gtmr; |
1011 | } |
1012 | } |
1013 | |
1014 | pthreadpool_compute_1d(threadpool, compute_function, &context, op->batch_size); |
1015 | break; |
1016 | } |
1017 | case qnnp_ukernel_type_lut: |
1018 | { |
1019 | const size_t batch_size = op->batch_size; |
1020 | const size_t channels = op->channels; |
1021 | const size_t x_stride = op->input_pixel_stride; |
1022 | const size_t y_stride = op->output_pixel_stride; |
1023 | if ((((x_stride ^ channels) | (y_stride ^ channels)) == 0) || batch_size == 1) { |
1024 | const size_t block_size = 1024; |
1025 | struct lut_contiguous_context context = { |
1026 | .x = op->input, |
1027 | .x_stride = x_stride * sizeof(uint8_t), |
1028 | .t = op->lookup_table, |
1029 | .y = op->output, |
1030 | .y_stride = y_stride * sizeof(uint8_t), |
1031 | .ukernel = qnnp_params.x8lut, |
1032 | }; |
1033 | pthreadpool_compute_1d_tiled( |
1034 | threadpool, |
1035 | (pthreadpool_function_1d_tiled_t) compute_lut_contiguous, &context, |
1036 | batch_size * channels * sizeof(uint8_t), block_size); |
1037 | } else { |
1038 | struct lut_strided_context context = { |
1039 | .n = channels, |
1040 | .x = op->input, |
1041 | .x_stride = x_stride * sizeof(uint8_t), |
1042 | .t = op->lookup_table, |
1043 | .y = op->output, |
1044 | .y_stride = y_stride * sizeof(uint8_t), |
1045 | .ukernel = qnnp_params.x8lut, |
1046 | }; |
1047 | pthreadpool_compute_1d( |
1048 | threadpool, |
1049 | (pthreadpool_function_1d_t) compute_lut_strided, &context, |
1050 | batch_size); |
1051 | } |
1052 | break; |
1053 | } |
1054 | case qnnp_ukernel_type_clamp: |
1055 | { |
1056 | const size_t batch_size = op->batch_size; |
1057 | const size_t channels = op->channels; |
1058 | const size_t x_stride = op->input_pixel_stride; |
1059 | const size_t y_stride = op->output_pixel_stride; |
1060 | if ((((x_stride ^ channels) | (y_stride ^ channels)) == 0) || batch_size == 1) { |
1061 | const size_t block_size = 4096; |
1062 | struct clamp_contiguous_context context = { |
1063 | .x = op->input, |
1064 | .x_stride = x_stride * sizeof(uint8_t), |
1065 | .y = op->output, |
1066 | .y_stride = y_stride * sizeof(uint8_t), |
1067 | .ukernel = qnnp_params.u8clamp, |
1068 | .params = op->u8_clamping_params, |
1069 | }; |
1070 | pthreadpool_compute_1d_tiled( |
1071 | threadpool, |
1072 | (pthreadpool_function_1d_tiled_t) compute_clamp_contiguous, &context, |
1073 | batch_size * channels * sizeof(uint8_t), block_size); |
1074 | } else { |
1075 | struct clamp_strided_context context = { |
1076 | .n = channels, |
1077 | .x = op->input, |
1078 | .x_stride = x_stride * sizeof(uint8_t), |
1079 | .y = op->output, |
1080 | .y_stride = y_stride * sizeof(uint8_t), |
1081 | .ukernel = qnnp_params.u8clamp, |
1082 | .params = op->u8_clamping_params, |
1083 | }; |
1084 | pthreadpool_compute_1d( |
1085 | threadpool, |
1086 | (pthreadpool_function_1d_t) compute_clamp_strided, &context, |
1087 | batch_size); |
1088 | } |
1089 | break; |
1090 | } |
1091 | case qnnp_ukernel_type_softargmax: |
1092 | { |
1093 | struct u8softargmax_context context = { |
1094 | .n = op->channels, |
1095 | .x = op->input, |
1096 | .x_stride = op->input_pixel_stride * sizeof(uint8_t), |
1097 | .t = op->lookup_table, |
1098 | .y = op->output, |
1099 | .y_stride = op->output_pixel_stride * sizeof(uint8_t), |
1100 | .rmax_ukernel = qnnp_params.u8rmax, |
1101 | .lut_norm_ukernel = qnnp_params.u8lut32norm, |
1102 | }; |
1103 | pthreadpool_compute_1d( |
1104 | threadpool, |
1105 | (pthreadpool_function_1d_t) compute_u8softargmax, &context, |
1106 | op->batch_size); |
1107 | break; |
1108 | } |
1109 | case qnnp_ukernel_type_channel_shuffle: |
1110 | { |
1111 | const size_t groups = op->groups; |
1112 | struct channel_shuffle_context channel_shuffle_context = { |
1113 | .x = op->input, |
1114 | .x_stride = op->input_pixel_stride * sizeof(uint8_t), |
1115 | .y = op->output, |
1116 | .y_stride = op->output_pixel_stride * sizeof(uint8_t), |
1117 | .n = op->group_channels * sizeof(uint8_t), |
1118 | .m = groups, |
1119 | }; |
1120 | pthreadpool_function_1d_t compute_function = NULL; |
1121 | switch (groups) { |
1122 | case 2: |
1123 | compute_function = (pthreadpool_function_1d_t) compute_channel_shuffle_fixed; |
1124 | channel_shuffle_context.fixed_ukernel = qnnp_params.x8zip.x2; |
1125 | break; |
1126 | case 3: |
1127 | compute_function = (pthreadpool_function_1d_t) compute_channel_shuffle_fixed; |
1128 | channel_shuffle_context.fixed_ukernel = qnnp_params.x8zip.x3; |
1129 | break; |
1130 | case 4: |
1131 | compute_function = (pthreadpool_function_1d_t) compute_channel_shuffle_fixed; |
1132 | channel_shuffle_context.fixed_ukernel = qnnp_params.x8zip.x4; |
1133 | break; |
1134 | default: |
1135 | compute_function = (pthreadpool_function_1d_t) compute_channel_shuffle_variable; |
1136 | channel_shuffle_context.variable_ukernel = qnnp_params.x8zip.xm; |
1137 | break; |
1138 | case 0: |
1139 | case 1: |
1140 | QNNP_UNREACHABLE; |
1141 | } |
1142 | pthreadpool_compute_1d( |
1143 | threadpool, |
1144 | compute_function, |
1145 | &channel_shuffle_context, |
1146 | op->batch_size); |
1147 | break; |
1148 | } |
1149 | default: |
1150 | QNNP_UNREACHABLE; |
1151 | } |
1152 | return qnnp_status_success; |
1153 | } |
1154 | |