1// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14
15#include <pthreadpool.h>
16
17#ifdef __cplusplus
18extern "C" {
19#endif
20
21/// The number of bytes XNNPACK may read beyond array bounds.
22/// The caller must allocate at least this many extra bytes after the tensor data passed to XNNPACK.
23///
24/// Note: XNNPACK reads, but never writes beyond array bounds.
25#define XNN_EXTRA_BYTES 16
26
27/// Maximum number of dimensions in tensor shape.
28#define XNN_MAX_TENSOR_DIMS 6
29
30/// Allow sparse inference in a Runtime.
31///
32/// Note: this flag hints XNNPACK to consider sparse inference, but does not guarantee it.
33#define XNN_FLAG_SPARSE_INFERENCE 0x00000001
34#define XNN_FLAG_HINT_SPARSE_INFERENCE XNN_FLAG_SPARSE_INFERENCE
35
36/// Allow IEEE FP16 inference in a Runtime.
37///
38/// Note: this flag hints XNNPACK to consider IEEE FP16 inference, but does not guarantee it.
39#define XNN_FLAG_FP16_INFERENCE 0x00000002
40#define XNN_FLAG_HINT_FP16_INFERENCE XNN_FLAG_FP16_INFERENCE
41
42/// Force IEEE FP16 inference in a Runtime, and fail if FP16 inference is not possible.
43///
44/// Note: this flag guarantees that XNNPACK will use IEEE FP16 inference, or fail to create the Runtime object.
45/// Warning: on x86 systems FP16 computations will be emulated at a substantial performance cost.
46#define XNN_FLAG_FORCE_FP16_INFERENCE 0x00000004
47
48/// Enable timing of each operator's runtime.
49#define XNN_FLAG_BASIC_PROFILING 0x00000008
50
51/// The convolution operator represents a depthwise convolution, and use HWGo layout for filters.
52#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001
53
54/// Assume transposed weights in a fully connected operator.
55#define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001
56
57/// The operator assumes NHWC layout for the input, regardless of the output layout.
58#define XNN_FLAG_INPUT_NHWC 0x00000002
59
60/// Match "SAME" padding in TensorFlow. Exact padding values are computed dynamically depending on input size.
61#define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004
62
63/// Implicitly flatten and reshape input of a Fully Connected operator into a 2D tensor.
64#define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004
65
66/// Match behaviour of TensorFlow 1.x.
67#define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004
68
69/// Static weights of the FP16 operator are in FP32 format.
70#define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008
71
72/// Align corners of input and output images in resize operations.
73#define XNN_FLAG_ALIGN_CORNERS 0x00000008
74
75/// Yield worker threads of the thread pool to the system scheduler after the inference.
76#define XNN_FLAG_YIELD_WORKERS 0x00000010
77
78/// Status code for any XNNPACK function call.
79enum xnn_status {
80 /// The call succeeded, and all output arguments now contain valid data.
81 xnn_status_success = 0,
82 xnn_status_uninitialized = 1,
83 xnn_status_invalid_parameter = 2,
84 xnn_status_invalid_state = 3,
85 xnn_status_unsupported_parameter = 4,
86 xnn_status_unsupported_hardware = 5,
87 xnn_status_out_of_memory = 6,
88};
89
90struct xnn_allocator {
91 /// User-specified pointer that will be passed as-is to all functions in this structure.
92 void* context;
93 /// Pointer to a function to be called for general memory allocation.
94 ///
95 /// @param context - The user-specified pointer from xnn_allocator structure.
96 /// @param size - The size of the memory block to allocate, in bytes.
97 ///
98 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
99 /// If allocation fails, the function must return NULL.
100 void* (*allocate)(void* context, size_t size);
101 /// Pointer to a function to be called for general memory re-allocation, i.e. to increase or shrink a previously
102 /// allocated memory block. The content of the old memory block is copied to the new memory block.
103 ///
104 /// @param context - The user-specified pointer from xnn_allocator structure.
105 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
106 /// If the pointer is NULL, the @ref reallocate call is equivalent to an @ref allocate call.
107 /// @param size - The new size of the memory block to allocate, in bytes.
108 ///
109 /// @returns Pointer to the newly allocated memory block of at least @ref size bytes with the content of the previous
110 /// memory block.
111 /// If allocation fails, the function must return NULL, but must not release the previous memory block.
112 void* (*reallocate)(void* context, void* pointer, size_t size);
113 /// Pointer to a function to be called for general memory de-allocation.
114 ///
115 /// @param context - The user-specified pointer from xnn_allocator structure.
116 /// @param pointer - Pointer to a memory block allocated by @ref allocate or @ref reallocate functions. Can be NULL.
117 /// If the pointer is NULL, the @ref deallocate call is a no-op.
118 void (*deallocate)(void* context, void* pointer);
119 /// Pointer to a function to be called for aligned memory allocation.
120 ///
121 /// @param context - The user-specified pointer from xnn_allocator structure.
122 /// @param alignment - The alignment of the memory block to allocate, in bytes. Alignment is always a power-of-2.
123 /// @param size - The size of the memory block to allocate, in bytes.
124 ///
125 /// @returns Pointer to the allocated memory block of at least @ref size bytes.
126 /// If allocation fails, the function must return NULL.
127 void* (*aligned_allocate)(void* context, size_t alignment, size_t size);
128 /// Pointer to a function to be called for aligned memory de-allocation.
129 ///
130 /// @param context - The user-specified pointer from xnn_allocator structure.
131 /// @param pointer - Pointer to a memory block allocated by @ref aligned_allocate function. Can be NULL.
132 /// If the pointer is NULL, the @ref aligned_deallocate call is a no-op.
133 void (*aligned_deallocate)(void* context, void* pointer);
134};
135
136/// Initialize XNNPACK library.
137///
138/// XNNPACK must be successfully initialized before use. During initialization, XNNPACK populates internal structures
139/// depending on the host processor. Initialization can be time-consuming.
140///
141/// @param[in] allocator - structure with function pointers to be use for memory allocation and de-allocation.
142/// If this argument is NULL, system-provided memory management functions (e.g. malloc/free)
143/// will be used.
144///
145/// @retval xnn_status_success - XNNPACK is successfully initialized and ready to use.
146/// @retval xnn_status_out_of_memory - initialization failed due to out-of-memory condition.
147/// @retval xnn_status_unsupported_hardware - initialization failed because the host processor does not satisfy the
148/// minimum hardware requirements for XNNPACK. E.g. this may happen on x86
149/// processors without SSE2 extension, or on 32-bit ARM processors without
150/// the NEON SIMD extension.
151enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
152
153/// Deinitialize XNNPACK library.
154///
155/// To avoid memory and resource leaks, users must call xnn_deinitialize once for each successful xnn_initialize call.
156///
157/// @retval xnn_status_success - deinitialization call succeeded.
158enum xnn_status xnn_deinitialize(void);
159
160/// Subgraph is an abstract representation of a neural network model.
161/// Subgraph objects are used to define Values (tensors) and Nodes (operators) comprising the model.
162typedef struct xnn_subgraph* xnn_subgraph_t;
163
164/// Create a empty Subgraph object.
165///
166/// @param external_value_ids - number of Value IDs to reserve for communication with external graph representation.
167/// The Subgraph object would avoid creating internal Value IDs in the
168/// [0, reserved_value_ids-1] range.
169/// @param flags - binary features of the subgraph. No supported flags are currently defined.
170/// @param subgraph_out - pointer to the variable that will be initialized with a handle to the Subgraph object upon
171/// successful return.
172enum xnn_status xnn_create_subgraph(
173 uint32_t external_value_ids,
174 uint32_t flags,
175 xnn_subgraph_t* subgraph_out);
176
177/// Destroy a Subgraph object, as well as Values, and Nodes associated with the subgraph.
178///
179/// @param subgraph - the Subgraph object to destroy.
180enum xnn_status xnn_delete_subgraph(
181 xnn_subgraph_t subgraph);
182
183#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001
184#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002
185#define XNN_VALUE_FLAG_PERSISTENT 0x00000004
186
187#define XNN_INVALID_VALUE_ID UINT32_MAX
188
189/// Type of elements in a Value object.
190enum xnn_datatype {
191 /// Invalid data type. Valid Values never have this datatype.
192 xnn_datatype_invalid = 0,
193 /// IEEE754 single-precision floating-point.
194 xnn_datatype_fp32 = 1,
195 /// IEEE754 half-precision floating-point.
196 xnn_datatype_fp16 = 2,
197 /// Quantized 8-bit signed integer with shared per-Value quantization parameters.
198 xnn_datatype_qint8 = 3,
199 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
200 xnn_datatype_quint8 = 4,
201 /// Quantized 32-bit signed integer with shared per-Value quantization parameters.
202 xnn_datatype_qint32 = 5,
203 /// Quantized 8-bit signed integer with shared per-channel quantization parameters.
204 xnn_datatype_qcint8 = 6,
205 /// Quantized 32-bit signed integer with shared per-channel quantization parameters.
206 xnn_datatype_qcint32 = 7,
207};
208
209/// Define a tensor-type Value and add it to a Subgraph.
210///
211/// @param subgraph - a Subgraph object that will own the created Value.
212/// @param datatype - type of the tensor elements.
213/// @param num_dims - number of dimensions in the shape.
214/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
215/// XNNPACK does not keep any pointers to this array after the function returns.
216/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
217/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
218/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
219/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
220/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
221/// created for the Value.
222/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
223/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
224/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
225/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
226enum xnn_status xnn_define_tensor_value(
227 xnn_subgraph_t subgraph,
228 enum xnn_datatype datatype,
229 size_t num_dims,
230 const size_t* dims,
231 const void* data,
232 uint32_t external_id,
233 uint32_t flags,
234 uint32_t* id_out);
235
236/// Define a quantized tensor-type Value and add it to a Subgraph.
237///
238/// @param subgraph - a Subgraph object that will own the created Value.
239/// @param datatype - type of the tensor elements.
240/// @param zero_point - offset from zero to subtract from the quantized elements in the Value.
241/// @param scale - multiplication factor to convert quantized elements to real representation.
242/// @param num_dims - number of dimensions in the shape.
243/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
244/// XNNPACK does not keep any pointers to this array after the function returns.
245/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
246/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
247/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
248/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
249/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
250/// created for the Value.
251/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
252/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
253/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
254/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
255enum xnn_status xnn_define_quantized_tensor_value(
256 xnn_subgraph_t subgraph,
257 enum xnn_datatype datatype,
258 int32_t zero_point,
259 float scale,
260 size_t num_dims,
261 const size_t* dims,
262 const void* data,
263 uint32_t external_id,
264 uint32_t flags,
265 uint32_t* id_out);
266
267/// Define a channelwise quantized tensor-type Value and add it to a Subgraph.
268///
269/// @param subgraph - a Subgraph object that will own the created Value.
270/// @param datatype - type of the tensor elements.
271/// @param scale - per-channel multiplication factors to convert quantized elements to real representation.
272/// @param num_dims - number of dimensions in the shape.
273/// @param channel_dim - index of the channel dimension in the tensor with per-channel quantization parameters.
274/// Typically this is the first dimension (dimension #0) of the filter tensors in the Convolution,
275/// Deconvolution, and Fully Connected operators and the last dimension of the filter tensors in
276/// the Depthwise Convolution operators.
277/// @param dims - pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
278/// XNNPACK does not keep any pointers to this array after the function returns.
279/// @param data - pointer to static data used for tensor initialization. If the tensor is not statically initialized,
280/// this pointer must be is NULL. If non-NULL, the life-time of the static data must exceed the life-time
281/// of the Subgraph object, and of any Runtime objects created from the Subgraph.
282/// @param external_id - external ID for the Value. The ID must be within the range of reversed Value IDs specified on
283/// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
284/// created for the Value.
285/// @param flags - binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
286/// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
287/// @param id_out - pointer to the variable that will be initialized with the Value ID upon successful return. If a
288/// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
289enum xnn_status xnn_define_channelwise_quantized_tensor_value(
290 xnn_subgraph_t subgraph,
291 enum xnn_datatype datatype,
292 const float* scale,
293 size_t num_dims,
294 size_t channel_dim,
295 const size_t* dims,
296 const void* data,
297 uint32_t external_id,
298 uint32_t flags,
299 uint32_t* id_out);
300
301/// Define a Convert Node and add it to a Subgraph.
302///
303/// @param subgraph - a Subgraph object that will own the created Node.
304/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
305/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
306/// shape must match the shape of the input tensor.
307/// @param flags - binary features of the Convert Node. No supported flags are currently defined.
308enum xnn_status xnn_define_convert(
309 xnn_subgraph_t subgraph,
310 uint32_t input_id,
311 uint32_t output_id,
312 uint32_t flags);
313
314/// Define a 2D Convolution Node and add it to a Subgraph.
315///
316/// @param subgraph - a Subgraph object that will own the created Node.
317/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
318/// flag is specified.
319/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
320/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
321/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
322/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
323/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
324/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
325/// @param kernel_height - kernel (filter) height.
326/// @param kernel_width - kernel (filter) width.
327/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
328/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
329/// @param dilation_height - dilation of kernel elements along the height dimension.
330/// @param dilation_width - dilation of kernel elements along the width dimension.
331/// @param groups - number of convolution groups.
332/// @param group_input_channels - number of input channels per group.
333/// @param group_output_channels - number of output channels per group.
334/// @param output_min - lower bound for clipping output values.
335/// @param output_max - upper bound for clipping output values.
336/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
337/// with [N, IH, IW, groups * group_input_channels] dimensions
338/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
339/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
340/// dimensions.
341/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If
342/// present, the bias tensor must be a 1D tensor defined in the @a subgraph with [groups *
343/// group_output_channels] dimensions.
344/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
345/// with [N, OH, OW, groups * group_output_channels] dimensions.
346/// @param flags - binary features of the 2D Convolution Node. The only currently supported values is
347/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
348enum xnn_status xnn_define_convolution_2d(
349 xnn_subgraph_t subgraph,
350 uint32_t input_padding_top,
351 uint32_t input_padding_right,
352 uint32_t input_padding_bottom,
353 uint32_t input_padding_left,
354 uint32_t kernel_height,
355 uint32_t kernel_width,
356 uint32_t subsampling_height,
357 uint32_t subsampling_width,
358 uint32_t dilation_height,
359 uint32_t dilation_width,
360 uint32_t groups,
361 size_t group_input_channels,
362 size_t group_output_channels,
363 float output_min,
364 float output_max,
365 uint32_t input_id,
366 uint32_t filter_id,
367 uint32_t bias_id,
368 uint32_t output_id,
369 uint32_t flags);
370
371/// Define a 2D Deconvolution (Transposed Convolution) Node and add it to a Subgraph.
372///
373/// @param subgraph - a Subgraph object that will own the created Node.
374/// @param padding_top - implicit padding above 2D output data.
375/// @param padding_right - implicit padding to the right of 2D output data.
376/// @param padding_bottom - implicit padding below 2D output data.
377/// @param padding_left - implicit padding to the left of 2D output data.
378/// @param adjustment_height - additional elements in the bottom of the 2D output data.
379/// @param adjustment_width - additional elements to the right of the 2D output data.
380/// @param kernel_height - kernel (filter) height.
381/// @param kernel_width - kernel (filter) width.
382/// @param upsampling_height - height of upsampling region for deconvolution input (deconvolution height stride).
383/// @param upsampling_width - width of upsampling region for deconvolution input (deconvolution width stride).
384/// @param dilation_height - dilation of kernel elements along the height dimension.
385/// @param dilation_width - dilation of kernel elements along the width dimension.
386/// @param groups - number of convolution groups.
387/// @param group_input_channels - number of input channels per group.
388/// @param group_output_channels - number of output channels per group.
389/// @param output_min - lower bound for clipping output values.
390/// @param output_max - upper bound for clipping output values.
391/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
392/// with [N, IH, IW, groups * group_input_channels] dimensions
393/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
394/// with [groups * group_output_channels, kernel_height, kernel_width, group_input_channels]
395/// dimensions.
396/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Convolution Node without a bias. If
397/// present, the bias tensor must be a 1D tensor defined in the @a subgraph with
398/// [groups * group_output_channels] dimensions.
399/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
400/// with [N, OH, OW, groups * group_output_channels] dimensions.
401/// @param flags - binary features of the 2D Deconvolution Node. No supported flags are currently defined.
402enum xnn_status xnn_define_deconvolution_2d(
403 xnn_subgraph_t subgraph,
404 uint32_t padding_top,
405 uint32_t padding_right,
406 uint32_t padding_bottom,
407 uint32_t padding_left,
408 uint32_t adjustment_height,
409 uint32_t adjustment_width,
410 uint32_t kernel_height,
411 uint32_t kernel_width,
412 uint32_t upsampling_height,
413 uint32_t upsampling_width,
414 uint32_t dilation_height,
415 uint32_t dilation_width,
416 uint32_t groups,
417 size_t group_input_channels,
418 size_t group_output_channels,
419 float output_min,
420 float output_max,
421 uint32_t input_id,
422 uint32_t filter_id,
423 uint32_t bias_id,
424 uint32_t output_id,
425 uint32_t flags);
426
427/// Define a 2D Depthwise Convolution Node and add it to a Subgraph.
428///
429/// @param subgraph - a Subgraph object that will own the created Node.
430/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
431/// flag is specified.
432/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
433/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
434/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
435/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
436/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
437/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
438/// @param kernel_height - kernel (filter) height.
439/// @param kernel_width - kernel (filter) width.
440/// @param subsampling_height - height of subsampling region for convolution output (convolution height stride).
441/// @param subsampling_width - width of subsampling region for convolution output (convolution width stride).
442/// @param dilation_height - dilation of kernel elements along the height dimension.
443/// @param dilation_width - dilation of kernel elements along the width dimension.
444/// @param depth_multiplier - ratio of output channels to input channels.
445/// @param input_channels - number of input channels.
446/// @param output_min - lower bound for clipping output values.
447/// @param output_max - upper bound for clipping output values.
448/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
449/// with [N, IH, IW, input_channels] dimensions
450/// @param filter_id - Value ID for the filter tensor. The filter tensor must ge a 4D tensor defined in the @a subgraph
451/// with [1, kernel_height, kernel_width, input_channels * depth_multiplier] dimensions.
452/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a 2D Depthwise Convolution Node without
453/// a bias. If present, the bias tensor must be a 1D tensor defined in the @a subgraph with
454/// [input_channels * depth_multiplier] dimensions.
455/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
456/// with [N, OH, OW, input_channels * depth_multiplier] dimensions.
457/// @param flags - binary features of the 2D Depthwise Convolution Node. The only currently supported values is
458/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
459enum xnn_status xnn_define_depthwise_convolution_2d(
460 xnn_subgraph_t subgraph,
461 uint32_t input_padding_top,
462 uint32_t input_padding_right,
463 uint32_t input_padding_bottom,
464 uint32_t input_padding_left,
465 uint32_t kernel_height,
466 uint32_t kernel_width,
467 uint32_t subsampling_height,
468 uint32_t subsampling_width,
469 uint32_t dilation_height,
470 uint32_t dilation_width,
471 uint32_t depth_multiplier,
472 size_t input_channels,
473 float output_min,
474 float output_max,
475 uint32_t input_id,
476 uint32_t filter_id,
477 uint32_t bias_id,
478 uint32_t output_id,
479 uint32_t flags);
480
481/// Define a Depth To Space Node and add it to a Subgraph.
482///
483/// The Depth To Space Node rearranges data from depth into blocks of spatial data (a reverse transform to
484/// Space To Depth). For a given input pixel, an output square of pixels with side @a block_size is formed from values
485/// in the corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times
486/// smaller than that of the input.
487///
488/// @param subgraph - a Subgraph object that will own the created Node.
489/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
490/// with [N, IH, IW, OC * block_size * block_size] dimensions.
491/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
492/// with [N, IH * block_size, IW * block_size, OC] dimensions.
493/// @param block_size - the size of the spatial block.
494/// @param flags - binary features of the input_channels Node. No supported flags are currently defined.
495enum xnn_status xnn_define_depth_to_space(
496 xnn_subgraph_t subgraph,
497 uint32_t input_id,
498 uint32_t output_id,
499 uint32_t block_size,
500 uint32_t flags);
501
502/// Define a 1D Global Average Pooling Node and add it to a Subgraph.
503///
504/// @param subgraph - a Subgraph object that will own the created Node.
505/// @param output_min - lower bound for clipping output values.
506/// @param output_max - upper bound for clipping output values.
507/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 2 or more dimensions
508/// defined in the @a subgraph. Averaging is performed across the second-innermost dimension.
509/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 2 or more
510/// dimensions defined in the @a subgraph.
511/// @param flags - binary features of the 1D Global Average Pooling Node. No supported flags are currently defined.
512enum xnn_status xnn_define_global_average_pooling_1d(
513 xnn_subgraph_t subgraph,
514 float output_min,
515 float output_max,
516 uint32_t input_id,
517 uint32_t output_id,
518 uint32_t flags);
519
520/// Define a 2D Global Average Pooling Node and add it to a Subgraph.
521///
522/// @param subgraph - a Subgraph object that will own the created Node.
523/// @param output_min - lower bound for clipping output values.
524/// @param output_max - upper bound for clipping output values.
525/// @param input_id - Value ID for the input tensor. The input tensor must be a dense tensor with 3 or more dimensions
526/// defined in the @a subgraph. Averaging is performed across the second- and third-innermost
527/// dimensions.
528/// @param output_id - Value ID for the output tensor. The output tensor must be a dense tensor with 3 or more
529/// dimensions defined in the @a subgraph.
530/// @param flags - binary features of the 2D Global Average Pooling Node. No supported flags are currently defined.
531enum xnn_status xnn_define_global_average_pooling_2d(
532 xnn_subgraph_t subgraph,
533 float output_min,
534 float output_max,
535 uint32_t input_id,
536 uint32_t output_id,
537 uint32_t flags);
538
539/// Define a 2D Average Pooling Node and add it to a Subgraph.
540///
541/// @param subgraph - a Subgraph object that will own the created Node.
542/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
543/// flag is specified.
544/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
545/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
546/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
547/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
548/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
549/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
550/// @param pooling_height - pooling (kernel) height.
551/// @param pooling_width - pooling (kernel) width.
552/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
553/// to vertically adjacent output pixels.
554/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
555/// to horizontally adjacent output pixels.
556/// @param output_min - lower bound for clipping output values.
557/// @param output_max - upper bound for clipping output values.
558/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
559/// with [N, IH, IW, channels] dimensions
560/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
561/// with [N, OH, OW, channels] dimensions.
562/// @param flags - binary features of the 2D Average Pooling Node. The only currently supported values is
563/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
564enum xnn_status xnn_define_average_pooling_2d(
565 xnn_subgraph_t subgraph,
566 uint32_t input_padding_top,
567 uint32_t input_padding_right,
568 uint32_t input_padding_bottom,
569 uint32_t input_padding_left,
570 uint32_t pooling_height,
571 uint32_t pooling_width,
572 uint32_t stride_height,
573 uint32_t stride_width,
574 float output_min,
575 float output_max,
576 uint32_t input_id,
577 uint32_t output_id,
578 uint32_t flags);
579
580/// Define a Fully Connected Node and add it to a Subgraph.
581///
582/// @param subgraph - a Subgraph object that will own the created Node.
583/// @param output_min - lower bound for clipping output values.
584/// @param output_max - upper bound for clipping output values.
585/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the
586/// @a subgraph. If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the input tensor must be at least
587/// 1D and its last dimension must match the last dimension of the filter tensor. In particular, if
588/// input is a 2D tensor, it must have [batch_size, input_channels] dimensions.
589/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, the number of elements in the input tensor must be
590/// divisible by the input_channels. The tensor will be first flattened into a 1D tensor of
591/// [num_input_elements] dimensions, then reshaped into a 2D tensor of
592/// [num_input_elements / input_channels, input_channels] dimensions where num_input_elements is the
593/// total number of elements in the input tensor.
594/// @param filter_id - Value ID for the filter tensor. The filter tensor must a 2D tensor defined in the @a subgraph.
595/// If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is not specified, the filter tensor must have
596/// [output_channels, input_channels] dimensions. If the XNN_FLAG_TRANSPOSE_WEIGHTS flag is
597/// specified, the filter tensor must have [input_channels, output_channels] dimensions.
598/// @param bias_id - Value ID for the bias tensor, or XNN_INVALID_VALUE_ID for a Fully Connected Node without a bias.
599/// If present, the bias tensor must be a 1D tensor defined in the @a subgraph with [output_channels]
600/// dimensions.
601/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph.
602/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is not specified, the output tensor must have the same
603/// dimensionality as the input tensor, all its dimensions but the last one must match the
604/// corresponding dimensions of the input tensor, and the last dimensions of the output tensor must
605/// match the first dimension of the filter tensor. In particular, if input is a 2D tensor, output
606/// must be a 2D tensor of [batch_size, output_channels] dimensions.
607/// If XNN_FLAG_TENSORFLOW_RESHAPE_2D is specified, output must be a 2D tensor of
608/// [num_input_elements / input_channels, output_channels] dimensions where num_input_elements is the
609/// total number of elements in the input tensor.
610/// @param flags - binary features of the Fully Connected Node. The only currently supported values are
611/// XNN_FLAG_TENSORFLOW_RESHAPE_2D and XNN_FLAG_TRANSPOSE_WEIGHTS.
612enum xnn_status xnn_define_fully_connected(
613 xnn_subgraph_t subgraph,
614 float output_min,
615 float output_max,
616 uint32_t input_id,
617 uint32_t filter_id,
618 uint32_t bias_id,
619 uint32_t output_id,
620 uint32_t flags);
621
622/// Define a 2D Max Pooling Node and add it to a Subgraph.
623///
624/// @param subgraph - a Subgraph object that will own the created Node.
625/// @param input_padding_top - implicit zero-padding above 2D input data. Must be 0 if XNN_FLAG_TENSORFLOW_SAME_PADDING
626/// flag is specified.
627/// @param input_padding_right - implicit zero-padding to the right of 2D input data. Must be 0 if
628/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
629/// @param input_padding_bottom - implicit zero-padding below 2D input data. Must be 0 if
630/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
631/// @param input_padding_left - implicit zero-padding to the left of 2D input data. Must be 0 if
632/// XNN_FLAG_TENSORFLOW_SAME_PADDING flag is specified.
633/// @param pooling_height - pooling (kernel) height.
634/// @param pooling_width - pooling (kernel) width.
635/// @param stride_height - displacing of the pooling window in the vertical dimension of the input pixels corresponding
636/// to vertically adjacent output pixels.
637/// @param stride_width - displacing of the pooling window in the horizontal dimension of the input pixels corresponding
638/// to horizontally adjacent output pixels.
639/// @param dilation_height - dilation of pooling elements along the height dimension.
640/// @param dilation_width - dilation of pooling elements along the width dimension.
641/// @param output_min - lower bound for clipping output values.
642/// @param output_max - upper bound for clipping output values.
643/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
644/// with [N, IH, IW, channels] dimensions
645/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
646/// with [N, OH, OW, channels] dimensions.
647/// @param flags - binary features of the 2D Max Pooling Node. The only currently supported values is
648/// XNN_FLAG_TENSORFLOW_SAME_PADDING.
649enum xnn_status xnn_define_max_pooling_2d(
650 xnn_subgraph_t subgraph,
651 uint32_t input_padding_top,
652 uint32_t input_padding_right,
653 uint32_t input_padding_bottom,
654 uint32_t input_padding_left,
655 uint32_t pooling_height,
656 uint32_t pooling_width,
657 uint32_t stride_height,
658 uint32_t stride_width,
659 uint32_t dilation_height,
660 uint32_t dilation_width,
661 float output_min,
662 float output_max,
663 uint32_t input_id,
664 uint32_t output_id,
665 uint32_t flags);
666
667/// Define a 2D ArgMax Pooling Node and add it to a Subgraph.
668///
669/// @param subgraph - a Subgraph object that will own the created Node.
670/// @param input_padding_top - implicit zero-padding above 2D input data.
671/// @param input_padding_right - implicit zero-padding to the right of 2D input data.
672/// @param input_padding_bottom - implicit zero-padding below 2D input data.
673/// @param input_padding_left - implicit zero-padding to the left of 2D input data.
674/// @param pooling_height - pooling (kernel) height. Vertical stride between pooling regions match this value.
675/// @param pooling_width - pooling (kernel) width. Horizontal stride between pooling regions match this value.
676/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
677/// with [N, IH, IW, channels] dimensions
678/// @param output_value_id - Value ID for the output tensor with the maximum values in the pools. The output tensor must
679/// be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels] dimensions.
680/// @param output_index_id - Value ID for the output tensor with the indexes of the maximum values in the pools. The
681/// output tensor must be a 4D tensor defined in the @a subgraph with [N, OH, OW, channels]
682/// dimensions.
683/// @param flags - binary features of the 2D ArgMax Pooling Node. No supported flags are currently defined.
684enum xnn_status xnn_define_argmax_pooling_2d(
685 xnn_subgraph_t subgraph,
686 uint32_t input_padding_top,
687 uint32_t input_padding_right,
688 uint32_t input_padding_bottom,
689 uint32_t input_padding_left,
690 uint32_t pooling_height,
691 uint32_t pooling_width,
692 uint32_t input_id,
693 uint32_t output_value_id,
694 uint32_t output_index_id,
695 uint32_t flags);
696
697/// Define a 2D UnPooling Node and add it to a Subgraph.
698///
699/// @param subgraph - a Subgraph object that will own the created Node.
700/// @param padding_top - implicit padding above 2D output data.
701/// @param padding_right - implicit padding to the right of 2D output data.
702/// @param padding_bottom - implicit padding below 2D output data.
703/// @param padding_left - implicit padding to the left of 2D output data.
704/// @param pooling_height - height of the pooling window.
705/// @param pooling_width - width of the pooling window.
706/// @param input_value_id - Value ID for the input tensor with the max-pooling values to invert. The input value tensor
707/// must be a 4D tensor defined in the @a subgraph with [N, IH, IW, channels] dimensions.
708/// @param input_index_id - Value ID for the input tensor with the indices of the per-pool maximum values produced by
709/// a 2D UnPooling Node. The input tensor must be a 4D tensor defined in the @a subgraph with
710/// [N, IH, IW, channels] dimensions.
711/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
712/// with [N, OH, OW, channels] dimensions.
713/// @param flags - binary features of the 2D UnPooling Node. No supported flags are currently defined.
714enum xnn_status xnn_define_unpooling_2d(
715 xnn_subgraph_t subgraph,
716 uint32_t padding_top,
717 uint32_t padding_right,
718 uint32_t padding_bottom,
719 uint32_t padding_left,
720 uint32_t pooling_height,
721 uint32_t pooling_width,
722 uint32_t input_value_id,
723 uint32_t input_index_id,
724 uint32_t output_id,
725 uint32_t flags);
726
727/// Define a 2-Input Add Node and add it to a Subgraph.
728///
729/// The 2-Input Add Node computes elementwise addition of two tensor inputs with numpy broadcasting rules.
730///
731/// @param subgraph - a Subgraph object that will own the created Node.
732/// @param output_min - lower bound for clipping output values.
733/// @param output_max - upper bound for clipping output values.
734/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
735/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
736/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
737/// that dimension.
738/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
739/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
740/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
741/// that dimension.
742/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
743/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
744/// of the two inputs.
745/// @param flags - binary features of the Add Node. No supported flags are currently defined.
746enum xnn_status xnn_define_add2(
747 xnn_subgraph_t subgraph,
748 float output_min,
749 float output_max,
750 uint32_t input1_id,
751 uint32_t input2_id,
752 uint32_t output_id,
753 uint32_t flags);
754
755/// Define a 2-Input Multiply Node and add it to a Subgraph.
756///
757/// The 2-Input Multiply Node computes elementwise multiplication of two tensor inputs with numpy broadcasting rules.
758///
759/// @param subgraph - a Subgraph object that will own the created Node.
760/// @param output_min - lower bound for clipping output values.
761/// @param output_max - upper bound for clipping output values.
762/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
763/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
764/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
765/// that dimension.
766/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
767/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
768/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
769/// that dimension.
770/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
771/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
772/// of the two inputs.
773/// @param flags - binary features of the Multiply Node. No supported flags are currently defined.
774enum xnn_status xnn_define_multiply2(
775 xnn_subgraph_t subgraph,
776 float output_min,
777 float output_max,
778 uint32_t input1_id,
779 uint32_t input2_id,
780 uint32_t output_id,
781 uint32_t flags);
782
783/// Define a Subtract Node and add it to a Subgraph.
784///
785/// The Subtract Node computes elementwise subtraction of two tensor inputs with numpy broadcasting rules.
786///
787/// @param subgraph - a Subgraph object that will own the created Node.
788/// @param output_min - lower bound for clipping output values.
789/// @param output_max - upper bound for clipping output values.
790/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
791/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
792/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
793/// that dimension.
794/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
795/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
796/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
797/// that dimension.
798/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
799/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
800/// of the two inputs.
801/// @param flags - binary features of the Subtract Node. No supported flags are currently defined.
802enum xnn_status xnn_define_subtract(
803 xnn_subgraph_t subgraph,
804 float output_min,
805 float output_max,
806 uint32_t input1_id,
807 uint32_t input2_id,
808 uint32_t output_id,
809 uint32_t flags);
810
811/// Define a Divide Node and add it to a Subgraph.
812///
813/// The Divide Node computes elementwise division of two tensor inputs with numpy broadcasting rules.
814///
815/// @param subgraph - a Subgraph object that will own the created Node.
816/// @param output_min - lower bound for clipping output values.
817/// @param output_max - upper bound for clipping output values.
818/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
819/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
820/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
821/// that dimension.
822/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
823/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
824/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
825/// that dimension.
826/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
827/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
828/// of the two inputs.
829/// @param flags - binary features of the Divide Node. No supported flags are currently defined.
830enum xnn_status xnn_define_divide(
831 xnn_subgraph_t subgraph,
832 float output_min,
833 float output_max,
834 uint32_t input1_id,
835 uint32_t input2_id,
836 uint32_t output_id,
837 uint32_t flags);
838
839/// Define a 2-Input Maximum Node and add it to a Subgraph.
840///
841/// The 2-Input Maximum Node computes elementwise maximum of two tensor inputs with numpy broadcasting rules.
842///
843/// @param subgraph - a Subgraph object that will own the created Node.
844/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
845/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
846/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
847/// that dimension.
848/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
849/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
850/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
851/// that dimension.
852/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
853/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
854/// of the two inputs.
855/// @param flags - binary features of the Maximum Node. No supported flags are currently defined.
856enum xnn_status xnn_define_maximum2(
857 xnn_subgraph_t subgraph,
858 uint32_t input1_id,
859 uint32_t input2_id,
860 uint32_t output_id,
861 uint32_t flags);
862
863/// Define a 2-Input Minimum Node and add it to a Subgraph.
864///
865/// The 2-Input Minimum Node computes elementwise minimum of two tensor inputs with numpy broadcasting rules.
866///
867/// @param subgraph - a Subgraph object that will own the created Node.
868/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
869/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
870/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
871/// that dimension.
872/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
873/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
874/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
875/// that dimension.
876/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
877/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
878/// of the two inputs.
879/// @param flags - binary features of the Minimum Node. No supported flags are currently defined.
880enum xnn_status xnn_define_minimum2(
881 xnn_subgraph_t subgraph,
882 uint32_t input1_id,
883 uint32_t input2_id,
884 uint32_t output_id,
885 uint32_t flags);
886
887/// Define a Squared Difference Node and add it to a Subgraph.
888///
889/// The Squared Difference Node computes elementwise squared difference of two tensor inputs with numpy broadcasting
890/// rules.
891///
892/// @param subgraph - a Subgraph object that will own the created Node.
893/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
894/// the @a subgraph with each dimension either equal to the corresponding dimension of the second
895/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
896/// that dimension.
897/// @param input2_id - Value ID for the second input tensor. The input tensor must be an M-dimensional tensor defined in
898/// the @a subgraph with each dimension either equal to the corresponding dimension of the first
899/// input, or equal to 1. In the latter case, the elements of the input tensor are broadcasted along
900/// that dimension.
901/// @param output_id - Value ID for the output tensor. The output tensor must be a max(N,M)-dimensional tensor defined
902/// in the @a subgraph with each dimension equal to the maximum between the corresponding dimension
903/// of the two inputs.
904/// @param flags - binary features of the Squared Difference Node. No supported flags are currently defined.
905enum xnn_status xnn_define_squared_difference(
906 xnn_subgraph_t subgraph,
907 uint32_t input1_id,
908 uint32_t input2_id,
909 uint32_t output_id,
910 uint32_t flags);
911
912/// Define a Constant Pad Node with static padding specification and add it to a Subgraph.
913///
914/// @param subgraph - a Subgraph object that will own the created Node.
915/// @param pre_paddings - number of padding elements to insert before input elements for every dimension. This array
916/// must have as many elements as the number of dimensions in the input tensor.
917/// @param post_paddings - number of padding elements to insert after input elements for every dimension. This array
918/// must have as many elements as the number of dimensions in the input tensor.
919/// @param padding_value - constant value used to initialize padding elements.
920/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
921/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
922/// shape must match the shape of the input tensor with padding.
923/// @param flags - binary features of the Constant Pad Node. No supported flags are currently defined.
924enum xnn_status xnn_define_static_constant_pad(
925 xnn_subgraph_t subgraph,
926 const size_t* pre_paddings,
927 const size_t* post_paddings,
928 float padding_value,
929 uint32_t input_id,
930 uint32_t output_id,
931 uint32_t flags);
932
933/// Define a 2-Input Concatenate Node and add it to a Subgraph.
934///
935/// The 2-Input Concatenate Node concatenates two tensors along a specified axis.
936///
937/// @param subgraph - a Subgraph object that will own the created Node.
938/// @param axis - the axis to concatenate the two input tensors along
939/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
940/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
941/// second input.
942/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in
943/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
944/// first input.
945/// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined
946/// in the @a subgraph with each dimension equal to the dimension of both inputs, except the axis
947/// dimension, where it is the sum of the corresponding dimensions of both inputs.
948/// @param flags - binary features of the Concatenate Node. No supported flags are currently defined.
949enum xnn_status xnn_define_concatenate2(
950 xnn_subgraph_t subgraph,
951 size_t axis,
952 uint32_t input1_id,
953 uint32_t input2_id,
954 uint32_t output_id,
955 uint32_t flags);
956
957/// Define a 3-Input Concatenate Node and add it to a Subgraph.
958///
959/// The 3-Input Concatenate Node concatenates three tensors along a specified axis.
960///
961/// @param subgraph - a Subgraph object that will own the created Node.
962/// @param axis - the axis to concatenate the three input tensors along
963/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
964/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
965/// other inputs.
966/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in
967/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
968/// other inputs.
969/// @param input3_id - Value ID for the third input tensor. The input tensor must be an N-dimensional tensor defined in
970/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
971/// other inputs.
972/// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined
973/// in the @a subgraph with each dimension equal to the dimension of all inputs, except the axis
974/// dimension, where it is the sum of the corresponding dimensions of all inputs.
975/// @param flags - binary features of the Concatenate Node. No supported flags are currently defined.
976enum xnn_status xnn_define_concatenate3(
977 xnn_subgraph_t subgraph,
978 size_t axis,
979 uint32_t input1_id,
980 uint32_t input2_id,
981 uint32_t input3_id,
982 uint32_t output_id,
983 uint32_t flags);
984
985/// Define a 4-Input Concatenate Node and add it to a Subgraph.
986///
987/// The 4-Input Concatenate Node concatenates four tensors along a specified axis.
988///
989/// @param subgraph - a Subgraph object that will own the created Node.
990/// @param axis - the axis to concatenate the four input tensors along
991/// @param input1_id - Value ID for the first input tensor. The input tensor must be an N-dimensional tensor defined in
992/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
993/// other inputs.
994/// @param input2_id - Value ID for the second input tensor. The input tensor must be an N-dimensional tensor defined in
995/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
996/// other inputs.
997/// @param input3_id - Value ID for the third input tensor. The input tensor must be an N-dimensional tensor defined in
998/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
999/// other inputs.
1000/// @param input4_id - Value ID for the fourth input tensor. The input tensor must be an N-dimensional tensor defined in
1001/// the @a subgraph with each dimension, except the axis, equal to the corresponding dimension of the
1002/// other inputs.
1003/// @param output_id - Value ID for the output tensor. The output tensor must be a N-dimensional tensor defined
1004/// in the @a subgraph with each dimension equal to the dimension of all inputs, except the axis
1005/// dimension, where it is the sum of the corresponding dimensions of all inputs.
1006/// @param flags - binary features of the Concatenate Node. No supported flags are currently defined.
1007enum xnn_status xnn_define_concatenate4(
1008 xnn_subgraph_t subgraph,
1009 size_t axis,
1010 uint32_t input1_id,
1011 uint32_t input2_id,
1012 uint32_t input3_id,
1013 uint32_t input4_id,
1014 uint32_t output_id,
1015 uint32_t flags);
1016
1017/// Define a Copy Node and add it to a Subgraph.
1018///
1019/// The Copy Node copies an input tensor to an output tensor.
1020///
1021/// @param subgraph - a Subgraph object that will own the created Node.
1022/// @param input_id - Value ID for the first input tensor. The input tensor must be defined in the @a subgraph.
1023/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1024/// shape must match the shape of the input tensor.
1025/// @param flags - binary features of the Copy Node. No supported flags are currently defined.
1026enum xnn_status xnn_define_copy(
1027 xnn_subgraph_t subgraph,
1028 uint32_t input_id,
1029 uint32_t output_id,
1030 uint32_t flags);
1031
1032/// Define a 2-Output Split Node and add it to a Subgraph.
1033///
1034/// The 2-Output Split Node splits an input tensor into two output tensors along a specified axis evenly.
1035///
1036/// @param subgraph - a Subgraph object that will own the created Node.
1037/// @param split_dim - the dimension to split the input tensor along
1038/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a
1039/// subgraph.
1040/// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined
1041/// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension
1042/// of the second output. The split_dim dimension is half of the input's split_dim.
1043/// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor
1044/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding
1045/// dimension of the first output. The split_dim dimension is half of the input's split_dim.
1046/// @param flags - binary features of the Split Node. No supported flags are currently defined.
1047enum xnn_status xnn_define_even_split2(
1048 xnn_subgraph_t subgraph,
1049 size_t split_dim,
1050 uint32_t input_id,
1051 uint32_t output1_id,
1052 uint32_t output2_id,
1053 uint32_t flags);
1054
1055/// Define a 3-Output Split Node and add it to a Subgraph.
1056///
1057/// The 3-Output Split Node splits an input tensor into three output tensors along a specified axis evenly.
1058///
1059/// @param subgraph - a Subgraph object that will own the created Node.
1060/// @param split_dim - the dimension to split the input tensor along
1061/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a
1062/// subgraph.
1063/// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined
1064/// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension
1065/// of the second and third output. The split_dim dimension is one third of the input's split_dim.
1066/// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor
1067/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding
1068/// dimension of the first and third output. The split_dim dimension is one third of the input's
1069/// split_dim.
1070/// @param output3_id - Value ID for the third output tensor. The output tensor must be an N-dimensional tensor
1071/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding
1072/// dimension of the second and third output. The split_dim dimension is one third of the input's
1073/// split_dim.
1074/// @param flags - binary features of the Split Node. No supported flags are currently defined.
1075enum xnn_status xnn_define_even_split3(
1076 xnn_subgraph_t subgraph,
1077 size_t split_dim,
1078 uint32_t input_id,
1079 uint32_t output1_id,
1080 uint32_t output2_id,
1081 uint32_t output3_id,
1082 uint32_t flags);
1083
1084/// Define a 4-Output Split Node and add it to a Subgraph.
1085///
1086/// The 4-Output Split Node splits an input tensor into four output tensors along a specified axis evenly.
1087///
1088/// @param subgraph - a Subgraph object that will own the created Node.
1089/// @param split_dim - the dimension to split the input tensor along
1090/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in the @a
1091/// subgraph.
1092/// @param output1_id - Value ID for the first output tensor. The output tensor must be an N-dimensional tensor defined
1093/// in the @a subgraph with each dimension, except the axis, equal to the corresponding dimension
1094/// of the other output tensors. The split_dim dimension is one fourth of the input's split_dim.
1095/// @param output2_id - Value ID for the second output tensor. The output tensor must be an N-dimensional tensor
1096/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding
1097/// dimension of the other output tensors. The split_dim dimension is one fourth of the input's
1098/// split_dim.
1099/// @param output3_id - Value ID for the third output tensor. The output tensor must be an N-dimensional tensor
1100/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding
1101/// dimension of the other output tensors. The split_dim dimension is one fourth of the input's
1102/// split_dim.
1103/// @param output4_id - Value ID for the fourth output tensor. The output tensor must be an N-dimensional tensor
1104/// defined in the @a subgraph with each dimension, except the axis, equal to the corresponding
1105/// dimension of the other output tensors. The split_dim dimension is one fourth of the input's
1106/// split_dim.
1107/// @param flags - binary features of the Split Node. No supported flags are currently defined.
1108enum xnn_status xnn_define_even_split4(
1109 xnn_subgraph_t subgraph,
1110 size_t split_dim,
1111 uint32_t input_id,
1112 uint32_t output1_id,
1113 uint32_t output2_id,
1114 uint32_t output3_id,
1115 uint32_t output4_id,
1116 uint32_t flags);
1117
1118/// Define a Reshape Node with static shape specification and add it to a Subgraph.
1119///
1120/// @param subgraph - a Subgraph object that will own the created Node.
1121/// @param num_dims - number of shape dimensions in the output tensor.
1122/// @param new_shape - shape dimensions of the output tensor.
1123/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1124/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1125/// shape must match the shape of the input tensor with padding.
1126/// @param flags - binary features of the Reshape Node. No supported flags are currently defined.
1127enum xnn_status xnn_define_static_reshape(
1128 xnn_subgraph_t subgraph,
1129 size_t num_dims,
1130 const size_t* new_shape,
1131 uint32_t input_id,
1132 uint32_t output_id,
1133 uint32_t flags);
1134
1135/// Define a 2D Resize Bilinear Node with static output height & width specification and add it to a Subgraph.
1136///
1137/// @param subgraph - a Subgraph object that will own the created Node.
1138/// @param new_height - height dimension of the output tensor.
1139/// @param new_width - width dimension of the output tensor.
1140/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
1141/// with [N, H, W, C] dimensions.
1142/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
1143/// with [N, new_height, new_width, C] dimensions.
1144/// @param flags - binary features of the 2D Resize Bilinear Node. The only currently supported values are
1145/// XNN_FLAG_TENSORFLOW_LEGACY_MODE and XNN_FLAG_ALIGN_CORNERS, which are mutually exclusive.
1146enum xnn_status xnn_define_static_resize_bilinear_2d(
1147 xnn_subgraph_t subgraph,
1148 size_t new_height,
1149 size_t new_width,
1150 uint32_t input_id,
1151 uint32_t output_id,
1152 uint32_t flags);
1153
1154/// Define a PReLU (Parametric ReLU) Node and add it to a Subgraph.
1155///
1156/// @param subgraph - a Subgraph object that will own the created Node.
1157/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
1158/// with [N, H, W, channels] dimensions.
1159/// @param slope_id - Value ID for the bias tensor. The bias tensor must be a 1D tensor defined in the @a subgraph with
1160/// [channels] dimensions.
1161/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
1162/// with [N, H, W, channels] dimensions.
1163/// @param flags - binary features of the PReLU Node. No supported flags are currently defined.
1164enum xnn_status xnn_define_prelu(
1165 xnn_subgraph_t subgraph,
1166 uint32_t input_id,
1167 uint32_t slope_id,
1168 uint32_t output_id,
1169 uint32_t flags);
1170
1171/// Define a Abs Node and add it to a Subgraph.
1172///
1173/// @param subgraph - a Subgraph object that will own the created Node.
1174/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1175/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1176/// shape must match the shape of the input tensor.
1177/// @param flags - binary features of the Abs Node. No supported flags are currently defined.
1178enum xnn_status xnn_define_abs(
1179 xnn_subgraph_t subgraph,
1180 uint32_t input_id,
1181 uint32_t output_id,
1182 uint32_t flags);
1183
1184/// Define a Bankers' Rounding Node and add it to a Subgraph.
1185///
1186/// @param subgraph - a Subgraph object that will own the created Node.
1187/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1188/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1189/// shape must match the shape of the input tensor.
1190/// @param flags - binary features of the Bankers' Rounding Node. No supported flags are currently defined.
1191enum xnn_status xnn_define_bankers_rounding(
1192 xnn_subgraph_t subgraph,
1193 uint32_t input_id,
1194 uint32_t output_id,
1195 uint32_t flags);
1196
1197/// Define a Ceiling Node and add it to a Subgraph.
1198///
1199/// @param subgraph - a Subgraph object that will own the created Node.
1200/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1201/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1202/// shape must match the shape of the input tensor.
1203/// @param flags - binary features of the Ceiling Node. No supported flags are currently defined.
1204enum xnn_status xnn_define_ceiling(
1205 xnn_subgraph_t subgraph,
1206 uint32_t input_id,
1207 uint32_t output_id,
1208 uint32_t flags);
1209
1210/// Define a Clamp Node and add it to a Subgraph.
1211///
1212/// @param subgraph - a Subgraph object that will own the created Node.
1213/// @param output_min - lower bound for clipping output values.
1214/// @param output_max - upper bound for clipping output values.
1215/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1216/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1217/// shape must match the shape of the input tensor.
1218/// @param flags - binary features of the Clamp Node. No supported flags are currently defined.
1219enum xnn_status xnn_define_clamp(
1220 xnn_subgraph_t subgraph,
1221 float output_min,
1222 float output_max,
1223 uint32_t input_id,
1224 uint32_t output_id,
1225 uint32_t flags);
1226
1227/// Define an ELU (Exponential Linear Unit) Node and add it to a Subgraph.
1228///
1229/// @param subgraph - a Subgraph object that will own the created Node.
1230/// @param alpha - scale factor for negative output elements.
1231/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1232/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1233/// shape must match the shape of the input tensor.
1234/// @param flags - binary features of the ELU Node. No supported flags are currently defined.
1235enum xnn_status xnn_define_elu(
1236 xnn_subgraph_t subgraph,
1237 float alpha,
1238 uint32_t input_id,
1239 uint32_t output_id,
1240 uint32_t flags);
1241
1242/// Define a Floor Node and add it to a Subgraph.
1243///
1244/// @param subgraph - a Subgraph object that will own the created Node.
1245/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1246/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1247/// shape must match the shape of the input tensor.
1248/// @param flags - binary features of the Floor Node. No supported flags are currently defined.
1249enum xnn_status xnn_define_floor(
1250 xnn_subgraph_t subgraph,
1251 uint32_t input_id,
1252 uint32_t output_id,
1253 uint32_t flags);
1254
1255/// Define a HardSwish Node and add it to a Subgraph.
1256///
1257/// @param subgraph - a Subgraph object that will own the created Node.
1258/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1259/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1260/// shape must match the shape of the input tensor.
1261/// @param flags - binary features of the HardSwish Node. No supported flags are currently defined.
1262enum xnn_status xnn_define_hardswish(
1263 xnn_subgraph_t subgraph,
1264 uint32_t input_id,
1265 uint32_t output_id,
1266 uint32_t flags);
1267
1268/// Define a Leaky ReLU Node and add it to a Subgraph.
1269///
1270/// @param subgraph - a Subgraph object that will own the created Node.
1271/// @param negative_slope - scale factor for negative input elements.
1272/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1273/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1274/// shape must match the shape of the input tensor.
1275/// @param flags - binary features of the Leaky ReLU Node. No supported flags are currently defined.
1276enum xnn_status xnn_define_leaky_relu(
1277 xnn_subgraph_t subgraph,
1278 float negative_slope,
1279 uint32_t input_id,
1280 uint32_t output_id,
1281 uint32_t flags);
1282
1283/// Define a Negate Node and add it to a Subgraph.
1284///
1285/// @param subgraph - a Subgraph object that will own the created Node.
1286/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1287/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1288/// shape must match the shape of the input tensor.
1289/// @param flags - binary features of the Negate Node. No supported flags are currently defined.
1290enum xnn_status xnn_define_negate(
1291 xnn_subgraph_t subgraph,
1292 uint32_t input_id,
1293 uint32_t output_id,
1294 uint32_t flags);
1295
1296/// Define a Sigmoid Node and add it to a Subgraph.
1297///
1298/// @param subgraph - a Subgraph object that will own the created Node.
1299/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1300/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1301/// shape must match the shape of the input tensor.
1302/// @param flags - binary features of the Sigmoid Node. No supported flags are currently defined.
1303enum xnn_status xnn_define_sigmoid(
1304 xnn_subgraph_t subgraph,
1305 uint32_t input_id,
1306 uint32_t output_id,
1307 uint32_t flags);
1308
1309/// Define a SoftMax Node and add it to a Subgraph.
1310///
1311/// @param subgraph - a Subgraph object that will own the created Node.
1312/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph, and have at
1313/// least one dimension.
1314/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1315/// shape must match the shape of the input tensor.
1316/// @param flags - binary features of the SoftMax Node. No supported flags are currently defined.
1317enum xnn_status xnn_define_softmax(
1318 xnn_subgraph_t subgraph,
1319 uint32_t input_id,
1320 uint32_t output_id,
1321 uint32_t flags);
1322
1323/// Define a Space To Depth 2D Node and add it to a Subgraph.
1324///
1325/// The Space To Depth 2D Node rearranges blocks of spatial data into blocks (a reverse transform to Depth To Space 2D).
1326/// For a given input pixel, an output square of pixels with side @a block_size is formed from values in the
1327/// corresponding number of its channels. The output depth is therefore @a block_size x @a block_size times greater
1328/// than that of the input.
1329///
1330/// @param subgraph - a Subgraph object that will own the created Node.
1331/// @param block_size - the size of the spatial block.
1332/// @param input_id - Value ID for the input tensor. The input tensor must be a 4D tensor defined in the @a subgraph
1333/// with [N, IH * block_size, IW * block_size, OC] dimensions.
1334/// @param output_id - Value ID for the output tensor. The output tensor must be a 4D tensor defined in the @a subgraph
1335/// with [N, IH, IW, OC * block_size * block_size] dimensions.
1336/// @param flags - binary features of the input_channels Node. No supported flags are currently defined.
1337enum xnn_status xnn_define_space_to_depth_2d(
1338 xnn_subgraph_t subgraph,
1339 uint32_t block_size,
1340 uint32_t input_id,
1341 uint32_t output_id,
1342 uint32_t flags);
1343
1344/// Define a Square Node and add it to a Subgraph.
1345///
1346/// @param subgraph - a Subgraph object that will own the created Node.
1347/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1348/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1349/// shape must match the shape of the input tensor.
1350/// @param flags - binary features of the Square Node. No supported flags are currently defined.
1351enum xnn_status xnn_define_square(
1352 xnn_subgraph_t subgraph,
1353 uint32_t input_id,
1354 uint32_t output_id,
1355 uint32_t flags);
1356
1357/// Define a Square Root Node and add it to a Subgraph.
1358///
1359/// @param subgraph - a Subgraph object that will own the created Node.
1360/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1361/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1362/// shape must match the shape of the input tensor.
1363/// @param flags - binary features of the Square Root Node. No supported flags are currently defined.
1364enum xnn_status xnn_define_square_root(
1365 xnn_subgraph_t subgraph,
1366 uint32_t input_id,
1367 uint32_t output_id,
1368 uint32_t flags);
1369
1370/// Define a Static Slice Node add it to a Subgraph.
1371///
1372/// @param subgraph - a Subgraph object that will own the created Node.
1373/// @param num_dims - number of shape dimensions in the input and output tensor.
1374/// @param offsets - offsets in each dimension of the input tensor. This array must have @a num_dims elements.
1375/// @param sizes - size of each dimension in output tensor. This array must have @a num_dims elements.
1376/// @param input_id - Value ID for the input tensor. The input tensor must be defined in the @a subgraph.
1377/// @param output_id - Value ID for the output tensor. The output tensor must be defined in the @a subgraph, and its
1378/// dimensions must match @a sizes.
1379/// @param flags - binary features of the Static Slice Node. No supported flags are currently defined.
1380enum xnn_status xnn_define_static_slice(
1381 xnn_subgraph_t subgraph,
1382 size_t num_dims,
1383 const size_t* offsets,
1384 const size_t* sizes,
1385 uint32_t input_id,
1386 uint32_t output_id,
1387 uint32_t flags);
1388
1389/// Define a Static Transpose Node and add it to a Subgraph.
1390///
1391/// The Static Transpose Node applies a generalized transpose to the input tensor using the permuation in perm.
1392///
1393/// @param subgraph - a Subgraph object that will own the created Node.
1394/// @param input_id - Value ID for the input tensor. The input tensor must be an N-dimensional tensor defined in
1395/// the @a subgraph.
1396/// @param output_id - Value ID for the output tensor. The output tensor must be an N-dimensional tensor defined
1397/// in the @a subgraph with each dimension equal to its corresponding permuted input dimension.
1398/// @param num_dims - the number of permutation dimensions. This must be equal to the number of input dimensions.
1399/// @param perm - The permutation of the axis of the input tensor. The perm array must must contain 0 to N-1 in the
1400/// permuted order.
1401/// @param flags - binary features of the Static Transpose Node. No supported flags are currently defined.
1402enum xnn_status xnn_define_static_transpose(
1403 xnn_subgraph_t subgraph,
1404 size_t num_dims,
1405 const size_t* perm,
1406 uint32_t input_id,
1407 uint32_t output_id,
1408 uint32_t flags);
1409
1410/// Weights cache is a cache for packed weights. It can be reused between runtimes.
1411typedef struct xnn_weights_cache* xnn_weights_cache_t;
1412
1413enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out);
1414
1415/// Create a weights cache object specifying the initial size of weights cache (in bytes).
1416/// @size - initial capacity of the weights cache (in bytes), i.e. it can hold size bytes without growing.
1417/// @param weights_cache_out - pointer to the variable that will be initialized to a handle to the weights cache object
1418/// upon successful return. Once created, the weights cache object can be shared between
1419/// different Runtime objects.
1420enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out);
1421
1422
1423/// Weights cache can be finalized in these ways:
1424enum xnn_weights_cache_finalization_kind {
1425 /// Weights cache is finalized, no insert operations into the weights cache is allowed, even if the "inserted"
1426 /// weights already exist in thee cache. Weights cache memory will also be trimmed to page boundary and set to
1427 /// read-only (to prevent writes).
1428 xnn_weights_cache_finalization_kind_hard,
1429 /// Weights cache will be finalized with some extra space at the end, this allows for "inserting" into the cache only
1430 /// if the weights are already in the cache, and errors on inserting uncached weights. There is memory overhead.
1431 xnn_weights_cache_finalization_kind_soft,
1432};
1433
1434/// Finalizes the weights cache. The kind of finalization is specified by `finalization_kind`.
1435/// @param weights_cache - the weights cache object to finalize.
1436/// @param finalization_kind - the kind of finalization.
1437enum xnn_status xnn_finalize_weights_cache(
1438 xnn_weights_cache_t weights_cache,
1439 enum xnn_weights_cache_finalization_kind finalization_kind);
1440
1441/// Destroy a weights cache object, as well as memory used for the cache.
1442/// @param weights_cache - the weights cache object to destroy.
1443enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache);
1444
1445typedef struct xnn_workspace* xnn_workspace_t;
1446
1447/// Create a workspace object.
1448/// @param workspace_out - pointer to the variable that will be initialized to a handle to the workspace object upon
1449/// successful return. Once created, the workspace can be shared between different Runtime
1450/// objects.
1451enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out);
1452/// Destroy a workspace object, as well as memory used by the workspace. Object destruction can be deferred until all
1453/// Runtime objects created with this workspace are destroyed.
1454/// @param workspace - the workspace object to destroy.
1455enum xnn_status xnn_release_workspace(xnn_workspace_t workspace);
1456
1457/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
1458typedef struct xnn_runtime* xnn_runtime_t;
1459
1460enum xnn_profile_info {
1461 /// Returns a size_t containing the number of operators.
1462 xnn_profile_info_num_operators,
1463 /// Returns a char[] containing the null character separated names of all operators.
1464 xnn_profile_info_operator_name,
1465 /// Returns a uint64_t[] with the runtimes of all operators in the same order as xnn_profile_info_operator_name.
1466 xnn_profile_info_operator_timing,
1467};
1468
1469/// Return profile information for all operators.
1470///
1471/// @param runtime - a Runtime object created with @ref xnn_create_runtime, @ref xnn_create_runtime_v2 or
1472/// @ref xnn_create_runtime_v3.
1473/// @param param_name - type of profile information required.
1474/// @param param_value_size - the size in bytes of memory pointed to by param_value. If this is not sufficient then
1475/// param_value_size_ret will be set to the required size and xnn_status_out_of_memory will be
1476/// returned.
1477/// @param param_value - a pointer to memory location where appropriate values for a given param_value will be written.
1478/// @param param_value_size_ret - returns number of bytes required to write the result if param_value_size is not
1479/// sufficient.
1480enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime,
1481 enum xnn_profile_info param_name,
1482 size_t param_value_size,
1483 void* param_value,
1484 size_t* param_value_size_ret);
1485
1486/// Create a Runtime object from a subgraph.
1487///
1488/// @param subgraph - a Subgraph object with all Values and Nodes that would be handled by the runtime. No Values or
1489/// Nodes can be added to the runtime once it is constructed.
1490/// @param weights_cache - a cache for packed weights. The runtime will look up and reuse packed weights in this cache,
1491/// this will reduce memory allocated for packed weights.
1492/// @param workspace - a workspace to hold internal tensors. The runtime will allocate space used for internal tensors
1493/// and track them using workspace. Workspace can be shared and reused across different runtimes. If
1494/// workspace is NULL, there will be no sharing: each runtime has its own workspace.
1495/// @param threadpool - the thread pool to be used for parallelisation of computations in the runtime. If the thread
1496/// pool is NULL, the computation would run on the caller thread without parallelization.
1497/// @param flags - binary features of the runtime. The only currently supported values are
1498/// XNN_FLAG_HINT_SPARSE_INFERENCE, XNN_FLAG_HINT_FP16_INFERENCE, XNN_FLAG_FORCE_FP16_INFERENCE, and
1499/// XNN_FLAG_YIELD_WORKERS. If XNN_FLAG_YIELD_WORKERS is specified, worker threads would be yielded to
1500/// the system scheduler after processing the last operator in the Runtime.
1501/// @param runtime_out - pointer to the variable that will be initialized with a handle to the Runtime object upon
1502/// successful return. Once constructed, the Runtime object is independent of the Subgraph object
1503/// used to create it.
1504enum xnn_status xnn_create_runtime_v4(
1505 xnn_subgraph_t subgraph,
1506 xnn_weights_cache_t weights_cache,
1507 xnn_workspace_t workspace,
1508 pthreadpool_t threadpool,
1509 uint32_t flags,
1510 xnn_runtime_t* runtime_out);
1511
1512enum xnn_status xnn_create_runtime_v3(
1513 xnn_subgraph_t subgraph,
1514 xnn_weights_cache_t weights_cache,
1515 pthreadpool_t threadpool,
1516 uint32_t flags,
1517 xnn_runtime_t* runtime_out);
1518
1519enum xnn_status xnn_create_runtime_v2(
1520 xnn_subgraph_t subgraph,
1521 pthreadpool_t threadpool,
1522 uint32_t flags,
1523 xnn_runtime_t* runtime_out);
1524
1525enum xnn_status xnn_create_runtime(
1526 xnn_subgraph_t subgraph,
1527 xnn_runtime_t* runtime_out);
1528
1529struct xnn_external_value {
1530 uint32_t id;
1531 void* data;
1532};
1533
1534/// Setup data pointers for external inputs and outputs in a Runtime object.
1535///
1536/// @param runtime - a Runtime object created with @ref xnn_create_runtime or @ref xnn_create_runtime_v2.
1537/// @param num_external_values - the number of external inputs and outputs specified in this call. This number must
1538/// match the number of external inputs and outputs in the runtime, i.e. all external
1539/// inputs and outputs in the runtime must be specified in one call.
1540/// @param external_values - array with location information for all external inputs and outputs in the runtime.
1541enum xnn_status xnn_setup_runtime(
1542 xnn_runtime_t runtime,
1543 size_t num_external_values,
1544 const struct xnn_external_value* external_values);
1545
1546/// Execute forward pass for all operators in the runtime.
1547///
1548/// @param runtime - the Runtime object with the execution plan to invoke.
1549enum xnn_status xnn_invoke_runtime(
1550 xnn_runtime_t runtime);
1551
1552/// Destroy a Runtime object, as well as operators and memory associated with it.
1553///
1554/// @param runtime - the Runtime object to destroy.
1555enum xnn_status xnn_delete_runtime(
1556 xnn_runtime_t runtime);
1557
1558typedef struct xnn_operator* xnn_operator_t;
1559
1560enum xnn_status xnn_run_operator(
1561 xnn_operator_t op,
1562 pthreadpool_t threadpool);
1563
1564enum xnn_status xnn_delete_operator(
1565 xnn_operator_t op);
1566
1567#ifndef XNN_NO_F32_OPERATORS
1568
1569enum xnn_status xnn_create_abs_nc_f32(
1570 size_t channels,
1571 size_t input_stride,
1572 size_t output_stride,
1573 uint32_t flags,
1574 xnn_operator_t* abs_op_out);
1575
1576enum xnn_status xnn_setup_abs_nc_f32(
1577 xnn_operator_t abs_op,
1578 size_t batch_size,
1579 const float* input,
1580 float* output,
1581 pthreadpool_t threadpool);
1582
1583enum xnn_status xnn_run_abs_nc_f32(
1584 size_t channels,
1585 size_t input_stride,
1586 size_t output_stride,
1587 size_t batch_size,
1588 const float* input,
1589 float* output,
1590 uint32_t flags,
1591 pthreadpool_t threadpool);
1592
1593enum xnn_status xnn_create_add_nd_f32(
1594 float output_min,
1595 float output_max,
1596 uint32_t flags,
1597 xnn_operator_t* add_op_out);
1598
1599enum xnn_status xnn_setup_add_nd_f32(
1600 xnn_operator_t add_op,
1601 size_t num_input1_dims,
1602 const size_t* input1_shape,
1603 size_t num_input2_dims,
1604 const size_t* input2_shape,
1605 const float* input1,
1606 const float* input2,
1607 float* output,
1608 pthreadpool_t threadpool);
1609
1610enum xnn_status xnn_run_add_nd_f32(
1611 size_t num_input1_dims,
1612 const size_t* input1_shape,
1613 size_t num_input2_dims,
1614 const size_t* input2_shape,
1615 const float* input1,
1616 const float* input2,
1617 float* output,
1618 float output_min,
1619 float output_max,
1620 uint32_t flags,
1621 pthreadpool_t threadpool);
1622
1623enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
1624 uint32_t input_padding_top,
1625 uint32_t input_padding_right,
1626 uint32_t input_padding_bottom,
1627 uint32_t input_padding_left,
1628 uint32_t pooling_height,
1629 uint32_t pooling_width,
1630 size_t channels,
1631 size_t input_pixel_stride,
1632 size_t output_pixel_stride,
1633 uint32_t flags,
1634 xnn_operator_t* argmax_pooling_op_out);
1635
1636enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
1637 xnn_operator_t argmax_pooling_op,
1638 size_t batch_size,
1639 size_t input_height,
1640 size_t input_width,
1641 const float* input,
1642 float* output,
1643 uint32_t* index,
1644 pthreadpool_t threadpool);
1645
1646enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
1647 uint32_t input_padding_top,
1648 uint32_t input_padding_right,
1649 uint32_t input_padding_bottom,
1650 uint32_t input_padding_left,
1651 uint32_t pooling_height,
1652 uint32_t pooling_width,
1653 uint32_t stride_height,
1654 uint32_t stride_width,
1655 size_t channels,
1656 size_t input_pixel_stride,
1657 size_t output_pixel_stride,
1658 float output_min,
1659 float output_max,
1660 uint32_t flags,
1661 xnn_operator_t* average_pooling_op_out);
1662
1663enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
1664 xnn_operator_t average_pooling_op,
1665 size_t batch_size,
1666 size_t input_height,
1667 size_t input_width,
1668 const float* input,
1669 float* output,
1670 pthreadpool_t threadpool);
1671
1672enum xnn_status xnn_create_bankers_rounding_nc_f32(
1673 size_t channels,
1674 size_t input_stride,
1675 size_t output_stride,
1676 uint32_t flags,
1677 xnn_operator_t* rounding_op_out);
1678
1679enum xnn_status xnn_setup_bankers_rounding_nc_f32(
1680 xnn_operator_t rounding_op,
1681 size_t batch_size,
1682 const float* input,
1683 float* output,
1684 pthreadpool_t threadpool);
1685
1686enum xnn_status xnn_run_bankers_rounding_nc_f32(
1687 size_t channels,
1688 size_t input_stride,
1689 size_t output_stride,
1690 size_t batch_size,
1691 const float* input,
1692 float* output,
1693 uint32_t flags,
1694 pthreadpool_t threadpool);
1695
1696enum xnn_status xnn_create_ceiling_nc_f32(
1697 size_t channels,
1698 size_t input_stride,
1699 size_t output_stride,
1700 uint32_t flags,
1701 xnn_operator_t* ceiling_op_out);
1702
1703enum xnn_status xnn_run_ceiling_nc_f32(
1704 size_t channels,
1705 size_t input_stride,
1706 size_t output_stride,
1707 size_t batch_size,
1708 const float* input,
1709 float* output,
1710 uint32_t flags,
1711 pthreadpool_t threadpool);
1712
1713enum xnn_status xnn_setup_ceiling_nc_f32(
1714 xnn_operator_t ceiling_op,
1715 size_t batch_size,
1716 const float* input,
1717 float* output,
1718 pthreadpool_t threadpool);
1719
1720enum xnn_status xnn_create_clamp_nc_f32(
1721 size_t channels,
1722 size_t input_stride,
1723 size_t output_stride,
1724 float output_min,
1725 float output_max,
1726 uint32_t flags,
1727 xnn_operator_t* clamp_op_out);
1728
1729enum xnn_status xnn_setup_clamp_nc_f32(
1730 xnn_operator_t clamp_op,
1731 size_t batch_size,
1732 const float* input,
1733 float* output,
1734 pthreadpool_t threadpool);
1735
1736enum xnn_status xnn_run_clamp_nc_f32(
1737 size_t channels,
1738 size_t input_stride,
1739 size_t output_stride,
1740 size_t batch_size,
1741 const float* input,
1742 float* output,
1743 float output_min,
1744 float output_max,
1745 uint32_t flags,
1746 pthreadpool_t threadpool);
1747
1748typedef const struct xnn_caches* xnn_caches_t;
1749
1750enum xnn_status xnn_create_convolution2d_nhwc_f32(
1751 uint32_t input_padding_top,
1752 uint32_t input_padding_right,
1753 uint32_t input_padding_bottom,
1754 uint32_t input_padding_left,
1755 uint32_t kernel_height,
1756 uint32_t kernel_width,
1757 uint32_t subsampling_height,
1758 uint32_t subsampling_width,
1759 uint32_t dilation_height,
1760 uint32_t dilation_width,
1761 uint32_t groups,
1762 size_t group_input_channels,
1763 size_t group_output_channels,
1764 size_t input_channel_stride,
1765 size_t output_channel_stride,
1766 const float* kernel,
1767 const float* bias,
1768 float output_min,
1769 float output_max,
1770 uint32_t flags,
1771 xnn_caches_t caches,
1772 xnn_operator_t* convolution_op_out);
1773
1774// Forward declare.
1775struct xnn_post_operation;
1776
1777/// Create a convolution operator with a number of post operations. The
1778/// convolution operator created using this function does not have output_min
1779/// and output_max. The list of operators in post_operations will be applied in
1780/// order. Convolution with post operations is only supported on JIT platforms
1781/// and when JIT is enabled.
1782enum xnn_status xnn_create_fused_convolution2d_nhwc_f32(
1783 uint32_t input_padding_top,
1784 uint32_t input_padding_right,
1785 uint32_t input_padding_bottom,
1786 uint32_t input_padding_left,
1787 uint32_t kernel_height,
1788 uint32_t kernel_width,
1789 uint32_t subsampling_height,
1790 uint32_t subsampling_width,
1791 uint32_t dilation_height,
1792 uint32_t dilation_width,
1793 uint32_t groups,
1794 size_t group_input_channels,
1795 size_t group_output_channels,
1796 size_t input_channel_stride,
1797 size_t output_channel_stride,
1798 const float* kernel,
1799 const float* bias,
1800 size_t num_post_operations,
1801 struct xnn_post_operation* post_operations,
1802 uint32_t flags,
1803 xnn_caches_t caches,
1804 xnn_operator_t* convolution_op_out);
1805
1806enum xnn_status xnn_setup_convolution2d_nhwc_f32(
1807 xnn_operator_t convolution_op,
1808 size_t batch_size,
1809 size_t input_height,
1810 size_t input_width,
1811 const float* input,
1812 float* output,
1813 pthreadpool_t threadpool);
1814
1815enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
1816 uint32_t output_padding_top,
1817 uint32_t output_padding_right,
1818 uint32_t output_padding_bottom,
1819 uint32_t output_padding_left,
1820 uint32_t kernel_height,
1821 uint32_t kernel_width,
1822 uint32_t stride_height,
1823 uint32_t stride_width,
1824 uint32_t dilation_height,
1825 uint32_t dilation_width,
1826 uint32_t groups,
1827 size_t group_input_channels,
1828 size_t group_output_channels,
1829 size_t input_pixel_stride,
1830 size_t output_pixel_stride,
1831 const float* kernel,
1832 const float* bias,
1833 float output_min,
1834 float output_max,
1835 uint32_t flags,
1836 xnn_caches_t caches,
1837 xnn_operator_t* deconvolution_op_out);
1838
1839enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
1840 xnn_operator_t deconvolution_op,
1841 size_t batch_size,
1842 size_t input_height,
1843 size_t input_width,
1844 uint32_t adjustment_height,
1845 uint32_t adjustment_width,
1846 const float* input,
1847 float* output,
1848 pthreadpool_t threadpool);
1849
1850enum xnn_status xnn_create_divide_nd_f32(
1851 float output_min,
1852 float output_max,
1853 uint32_t flags,
1854 xnn_operator_t* divide_op_out);
1855
1856enum xnn_status xnn_setup_divide_nd_f32(
1857 xnn_operator_t divide_op,
1858 size_t num_input1_dims,
1859 const size_t* input1_shape,
1860 size_t num_input2_dims,
1861 const size_t* input2_shape,
1862 const float* input1,
1863 const float* input2,
1864 float* output,
1865 pthreadpool_t threadpool);
1866
1867enum xnn_status xnn_run_divide_nd_f32(
1868 size_t num_input1_dims,
1869 const size_t* input1_shape,
1870 size_t num_input2_dims,
1871 const size_t* input2_shape,
1872 const float* input1,
1873 const float* input2,
1874 float* output,
1875 float output_min,
1876 float output_max,
1877 uint32_t flags,
1878 pthreadpool_t threadpool);
1879
1880enum xnn_status xnn_create_elu_nc_f32(
1881 size_t channels,
1882 size_t input_stride,
1883 size_t output_stride,
1884 float alpha,
1885 uint32_t flags,
1886 xnn_operator_t* elu_op_out);
1887
1888enum xnn_status xnn_setup_elu_nc_f32(
1889 xnn_operator_t elu_op,
1890 size_t batch_size,
1891 const float* input,
1892 float* output,
1893 pthreadpool_t threadpool);
1894
1895enum xnn_status xnn_run_elu_nc_f32(
1896 size_t channels,
1897 size_t input_stride,
1898 size_t output_stride,
1899 size_t batch_size,
1900 const float* input,
1901 float* output,
1902 float alpha,
1903 uint32_t flags,
1904 pthreadpool_t threadpool);
1905
1906enum xnn_status xnn_create_floor_nc_f32(
1907 size_t channels,
1908 size_t input_stride,
1909 size_t output_stride,
1910 uint32_t flags,
1911 xnn_operator_t* floor_op_out);
1912
1913enum xnn_status xnn_setup_floor_nc_f32(
1914 xnn_operator_t floor_op,
1915 size_t batch_size,
1916 const float* input,
1917 float* output,
1918 pthreadpool_t threadpool);
1919
1920enum xnn_status xnn_run_floor_nc_f32(
1921 size_t channels,
1922 size_t input_stride,
1923 size_t output_stride,
1924 size_t batch_size,
1925 const float* input,
1926 float* output,
1927 uint32_t flags,
1928 pthreadpool_t threadpool);
1929
1930enum xnn_status xnn_create_fully_connected_nc_f32(
1931 size_t input_channels,
1932 size_t output_channels,
1933 size_t input_stride,
1934 size_t output_stride,
1935 const float* kernel,
1936 const float* bias,
1937 float output_min,
1938 float output_max,
1939 uint32_t flags,
1940 const xnn_caches_t caches,
1941 xnn_operator_t* fully_connected_op_out);
1942
1943enum xnn_status xnn_setup_fully_connected_nc_f32(
1944 xnn_operator_t fully_connected_op,
1945 size_t batch_size,
1946 const float* input,
1947 float* output,
1948 pthreadpool_t threadpool);
1949
1950enum xnn_status xnn_create_global_average_pooling_nwc_f32(
1951 size_t channels,
1952 size_t input_stride,
1953 size_t output_stride,
1954 float output_min,
1955 float output_max,
1956 uint32_t flags,
1957 xnn_operator_t* global_average_pooling_op_out);
1958
1959enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
1960 xnn_operator_t global_average_pooling_op,
1961 size_t batch_size,
1962 size_t width,
1963 const float* input,
1964 float* output,
1965 pthreadpool_t threadpool);
1966
1967enum xnn_status xnn_create_hardswish_nc_f32(
1968 size_t channels,
1969 size_t input_stride,
1970 size_t output_stride,
1971 uint32_t flags,
1972 xnn_operator_t* hardswish_op_out);
1973
1974enum xnn_status xnn_setup_hardswish_nc_f32(
1975 xnn_operator_t hardswish_op,
1976 size_t batch_size,
1977 const float* input,
1978 float* output,
1979 pthreadpool_t threadpool);
1980
1981enum xnn_status xnn_run_hardswish_nc_f32(
1982 size_t channels,
1983 size_t input_stride,
1984 size_t output_stride,
1985 size_t batch_size,
1986 const float* input,
1987 float* output,
1988 uint32_t flags,
1989 pthreadpool_t threadpool);
1990
1991enum xnn_status xnn_create_leaky_relu_nc_f32(
1992 size_t channels,
1993 size_t input_stride,
1994 size_t output_stride,
1995 float negative_slope,
1996 uint32_t flags,
1997 xnn_operator_t* leaky_relu_op_out);
1998
1999enum xnn_status xnn_setup_leaky_relu_nc_f32(
2000 xnn_operator_t leaky_relu_op,
2001 size_t batch_size,
2002 const float* input,
2003 float* output,
2004 pthreadpool_t threadpool);
2005
2006enum xnn_status xnn_run_leaky_relu_nc_f32(
2007 size_t channels,
2008 size_t input_stride,
2009 size_t output_stride,
2010 size_t batch_size,
2011 const float* input,
2012 float* output,
2013 float negative_slope,
2014 uint32_t flags,
2015 pthreadpool_t threadpool);
2016
2017enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
2018 uint32_t input_padding_top,
2019 uint32_t input_padding_right,
2020 uint32_t input_padding_bottom,
2021 uint32_t input_padding_left,
2022 uint32_t pooling_height,
2023 uint32_t pooling_width,
2024 uint32_t stride_height,
2025 uint32_t stride_width,
2026 uint32_t dilation_height,
2027 uint32_t dilation_width,
2028 size_t channels,
2029 size_t input_pixel_stride,
2030 size_t output_pixel_stride,
2031 float output_min,
2032 float output_max,
2033 uint32_t flags,
2034 xnn_operator_t* max_pooling_op_out);
2035
2036enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
2037 xnn_operator_t max_pooling_op,
2038 size_t batch_size,
2039 size_t input_height,
2040 size_t input_width,
2041 const float* input,
2042 float* output,
2043 pthreadpool_t threadpool);
2044
2045enum xnn_status xnn_create_maximum_nd_f32(
2046 uint32_t flags,
2047 xnn_operator_t* maximum_op_out);
2048
2049enum xnn_status xnn_setup_maximum_nd_f32(
2050 xnn_operator_t maximum_op,
2051 size_t num_input1_dims,
2052 const size_t* input1_shape,
2053 size_t num_input2_dims,
2054 const size_t* input2_shape,
2055 const float* input1,
2056 const float* input2,
2057 float* output,
2058 pthreadpool_t threadpool);
2059
2060enum xnn_status xnn_run_maximum_nd_f32(
2061 size_t num_input1_dims,
2062 const size_t* input1_shape,
2063 size_t num_input2_dims,
2064 const size_t* input2_shape,
2065 const float* input1,
2066 const float* input2,
2067 float* output,
2068 float output_min,
2069 float output_max,
2070 uint32_t flags,
2071 pthreadpool_t threadpool);
2072
2073enum xnn_status xnn_create_minimum_nd_f32(
2074 uint32_t flags,
2075 xnn_operator_t* minimum_op_out);
2076
2077enum xnn_status xnn_setup_minimum_nd_f32(
2078 xnn_operator_t minimum_op,
2079 size_t num_input1_dims,
2080 const size_t* input1_shape,
2081 size_t num_input2_dims,
2082 const size_t* input2_shape,
2083 const float* input1,
2084 const float* input2,
2085 float* output,
2086 pthreadpool_t threadpool);
2087
2088enum xnn_status xnn_run_minimum_nd_f32(
2089 size_t num_input1_dims,
2090 const size_t* input1_shape,
2091 size_t num_input2_dims,
2092 const size_t* input2_shape,
2093 const float* input1,
2094 const float* input2,
2095 float* output,
2096 float output_min,
2097 float output_max,
2098 uint32_t flags,
2099 pthreadpool_t threadpool);
2100
2101enum xnn_status xnn_create_multiply_nd_f32(
2102 float output_min,
2103 float output_max,
2104 uint32_t flags,
2105 xnn_operator_t* multiply_op_out);
2106
2107enum xnn_status xnn_setup_multiply_nd_f32(
2108 xnn_operator_t multiply_op,
2109 size_t num_input1_dims,
2110 const size_t* input1_shape,
2111 size_t num_input2_dims,
2112 const size_t* input2_shape,
2113 const float* input1,
2114 const float* input2,
2115 float* output,
2116 pthreadpool_t threadpool);
2117
2118enum xnn_status xnn_run_multiply_nd_f32(
2119 size_t num_input1_dims,
2120 const size_t* input1_shape,
2121 size_t num_input2_dims,
2122 const size_t* input2_shape,
2123 const float* input1,
2124 const float* input2,
2125 float* output,
2126 float output_min,
2127 float output_max,
2128 uint32_t flags,
2129 pthreadpool_t threadpool);
2130
2131enum xnn_status xnn_create_negate_nc_f32(
2132 size_t channels,
2133 size_t input_stride,
2134 size_t output_stride,
2135 uint32_t flags,
2136 xnn_operator_t* negate_op_out);
2137
2138enum xnn_status xnn_setup_negate_nc_f32(
2139 xnn_operator_t negate_op,
2140 size_t batch_size,
2141 const float* input,
2142 float* output,
2143 pthreadpool_t threadpool);
2144
2145enum xnn_status xnn_run_negate_nc_f32(
2146 size_t channels,
2147 size_t input_stride,
2148 size_t output_stride,
2149 size_t batch_size,
2150 const float* input,
2151 float* output,
2152 uint32_t flags,
2153 pthreadpool_t threadpool);
2154
2155enum xnn_status xnn_create_prelu_nc_f32(
2156 size_t channels,
2157 size_t input_stride,
2158 size_t output_stride,
2159 const float* negative_slope,
2160 uint32_t flags,
2161 xnn_caches_t caches,
2162 xnn_operator_t* prelu_op_out);
2163
2164enum xnn_status xnn_setup_prelu_nc_f32(
2165 xnn_operator_t prelu_op,
2166 size_t batch_size,
2167 const float* input,
2168 float* output,
2169 pthreadpool_t threadpool);
2170
2171enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
2172 size_t channels,
2173 size_t input_pixel_stride,
2174 size_t output_pixel_stride,
2175 uint32_t flags,
2176 xnn_operator_t* resize_op_out);
2177
2178enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
2179 xnn_operator_t resize_op,
2180 size_t batch_size,
2181 size_t input_height,
2182 size_t input_width,
2183 size_t output_height,
2184 size_t output_width,
2185 const float* input,
2186 float* output,
2187 pthreadpool_t threadpool);
2188
2189enum xnn_status xnn_create_sigmoid_nc_f32(
2190 size_t channels,
2191 size_t input_stride,
2192 size_t output_stride,
2193 uint32_t flags,
2194 xnn_operator_t* sigmoid_op_out);
2195
2196enum xnn_status xnn_setup_sigmoid_nc_f32(
2197 xnn_operator_t sigmoid_op,
2198 size_t batch_size,
2199 const float* input,
2200 float* output,
2201 pthreadpool_t threadpool);
2202
2203enum xnn_status xnn_run_sigmoid_nc_f32(
2204 size_t channels,
2205 size_t input_stride,
2206 size_t output_stride,
2207 size_t batch_size,
2208 const float* input,
2209 float* output,
2210 uint32_t flags,
2211 pthreadpool_t threadpool);
2212
2213enum xnn_status xnn_create_softmax_nc_f32(
2214 size_t channels,
2215 size_t input_stride,
2216 size_t output_stride,
2217 uint32_t flags,
2218 xnn_operator_t* softmax_op_out);
2219
2220enum xnn_status xnn_setup_softmax_nc_f32(
2221 xnn_operator_t softmax_op,
2222 size_t batch_size,
2223 const float* input,
2224 float* output,
2225 pthreadpool_t threadpool);
2226
2227enum xnn_status xnn_create_square_nc_f32(
2228 size_t channels,
2229 size_t input_stride,
2230 size_t output_stride,
2231 uint32_t flags,
2232 xnn_operator_t* square_op_out);
2233
2234enum xnn_status xnn_setup_square_nc_f32(
2235 xnn_operator_t square_op,
2236 size_t batch_size,
2237 const float* input,
2238 float* output,
2239 pthreadpool_t threadpool);
2240
2241enum xnn_status xnn_run_square_nc_f32(
2242 size_t channels,
2243 size_t input_stride,
2244 size_t output_stride,
2245 size_t batch_size,
2246 const float* input,
2247 float* output,
2248 uint32_t flags,
2249 pthreadpool_t threadpool);
2250
2251enum xnn_status xnn_create_square_root_nc_f32(
2252 size_t channels,
2253 size_t input_stride,
2254 size_t output_stride,
2255 uint32_t flags,
2256 xnn_operator_t* sqrt_op_out);
2257
2258enum xnn_status xnn_setup_square_root_nc_f32(
2259 xnn_operator_t sqrt_op,
2260 size_t batch_size,
2261 const float* input,
2262 float* output,
2263 pthreadpool_t threadpool);
2264
2265enum xnn_status xnn_run_square_root_nc_f32(
2266 size_t channels,
2267 size_t input_stride,
2268 size_t output_stride,
2269 size_t batch_size,
2270 const float* input,
2271 float* output,
2272 uint32_t flags,
2273 pthreadpool_t threadpool);
2274
2275enum xnn_status xnn_create_squared_difference_nd_f32(
2276 uint32_t flags,
2277 xnn_operator_t* squared_difference_op_out);
2278
2279enum xnn_status xnn_setup_squared_difference_nd_f32(
2280 xnn_operator_t squared_difference_op,
2281 size_t num_input1_dims,
2282 const size_t* input1_shape,
2283 size_t num_input2_dims,
2284 const size_t* input2_shape,
2285 const float* input1,
2286 const float* input2,
2287 float* output,
2288 pthreadpool_t threadpool);
2289
2290enum xnn_status xnn_run_squared_difference_nd_f32(
2291 size_t num_input1_dims,
2292 const size_t* input1_shape,
2293 size_t num_input2_dims,
2294 const size_t* input2_shape,
2295 const float* input1,
2296 const float* input2,
2297 float* output,
2298 float output_min,
2299 float output_max,
2300 uint32_t flags,
2301 pthreadpool_t threadpool);
2302
2303enum xnn_status xnn_create_subtract_nd_f32(
2304 float output_min,
2305 float output_max,
2306 uint32_t flags,
2307 xnn_operator_t* subtract_op_out);
2308
2309enum xnn_status xnn_setup_subtract_nd_f32(
2310 xnn_operator_t subtract_op,
2311 size_t num_input1_dims,
2312 const size_t* input1_shape,
2313 size_t num_input2_dims,
2314 const size_t* input2_shape,
2315 const float* input1,
2316 const float* input2,
2317 float* output,
2318 pthreadpool_t threadpool);
2319
2320enum xnn_status xnn_run_subtract_nd_f32(
2321 size_t num_input1_dims,
2322 const size_t* input1_shape,
2323 size_t num_input2_dims,
2324 const size_t* input2_shape,
2325 const float* input1,
2326 const float* input2,
2327 float* output,
2328 float output_min,
2329 float output_max,
2330 uint32_t flags,
2331 pthreadpool_t threadpool);
2332
2333enum xnn_status xnn_create_truncation_nc_f32(
2334 size_t channels,
2335 size_t input_stride,
2336 size_t output_stride,
2337 uint32_t flags,
2338 xnn_operator_t* truncation_op_out);
2339
2340enum xnn_status xnn_setup_truncation_nc_f32(
2341 xnn_operator_t truncation_op,
2342 size_t batch_size,
2343 const float* input,
2344 float* output,
2345 pthreadpool_t threadpool);
2346
2347enum xnn_status xnn_run_truncation_nc_f32(
2348 size_t channels,
2349 size_t input_stride,
2350 size_t output_stride,
2351 size_t batch_size,
2352 const float* input,
2353 float* output,
2354 uint32_t flags,
2355 pthreadpool_t threadpool);
2356
2357#ifndef XNN_NO_NCHW_OPERATORS
2358
2359enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
2360 size_t output_channels,
2361 size_t input_channel_stride,
2362 size_t output_channel_stride,
2363 uint32_t block_size,
2364 uint32_t flags,
2365 xnn_operator_t* depth_to_space_op_out);
2366
2367enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
2368 xnn_operator_t depth_to_space_op,
2369 size_t batch_size,
2370 size_t input_height,
2371 size_t input_width,
2372 const void* input,
2373 void* output,
2374 pthreadpool_t threadpool);
2375
2376enum xnn_status xnn_create_convolution2d_nchw_f32(
2377 uint32_t input_padding_top,
2378 uint32_t input_padding_right,
2379 uint32_t input_padding_bottom,
2380 uint32_t input_padding_left,
2381 uint32_t kernel_height,
2382 uint32_t kernel_width,
2383 uint32_t subsampling_height,
2384 uint32_t subsampling_width,
2385 uint32_t dilation_height,
2386 uint32_t dilation_width,
2387 uint32_t groups,
2388 size_t group_input_channels,
2389 size_t group_output_channels,
2390 size_t input_channel_stride,
2391 size_t output_channel_stride,
2392 const float* kernel,
2393 const float* bias,
2394 float output_min,
2395 float output_max,
2396 uint32_t flags,
2397 xnn_caches_t caches,
2398 xnn_operator_t* convolution_op_out);
2399
2400enum xnn_status xnn_setup_convolution2d_nchw_f32(
2401 xnn_operator_t convolution_op,
2402 size_t batch_size,
2403 size_t input_height,
2404 size_t input_width,
2405 const float* input,
2406 float* output,
2407 pthreadpool_t threadpool);
2408
2409enum xnn_status xnn_create_global_average_pooling_ncw_f32(
2410 size_t channels,
2411 float output_min,
2412 float output_max,
2413 uint32_t flags,
2414 xnn_operator_t* global_average_pooling_op_out);
2415
2416enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
2417 xnn_operator_t global_average_pooling_op,
2418 size_t batch_size,
2419 size_t width,
2420 const float* input,
2421 float* output,
2422 pthreadpool_t threadpool);
2423
2424enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
2425 size_t channels,
2426 size_t input_pixel_stride,
2427 size_t output_pixel_stride,
2428 uint32_t flags,
2429 xnn_operator_t* resize_op_out);
2430
2431enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
2432 xnn_operator_t resize_op,
2433 size_t batch_size,
2434 size_t input_height,
2435 size_t input_width,
2436 size_t output_height,
2437 size_t output_width,
2438 const float* input,
2439 float* output,
2440 pthreadpool_t threadpool);
2441
2442#endif // XNN_NO_NCHW_OPERATORS
2443
2444#endif // XNN_NO_F32_OPERATORS
2445
2446#ifndef XNN_NO_X32_OPERATORS
2447
2448enum xnn_status xnn_create_channel_shuffle_nc_x32(
2449 size_t groups,
2450 size_t group_channels,
2451 size_t input_stride,
2452 size_t output_stride,
2453 uint32_t flags,
2454 xnn_operator_t* channel_shuffle_op_out);
2455
2456enum xnn_status xnn_setup_channel_shuffle_nc_x32(
2457 xnn_operator_t channel_shuffle_op,
2458 size_t batch_size,
2459 const void* input,
2460 void* output,
2461 pthreadpool_t threadpool);
2462
2463enum xnn_status xnn_create_constant_pad_nd_x32(
2464 const void* padding_value,
2465 uint32_t flags,
2466 xnn_operator_t* constant_pad_op_out);
2467
2468enum xnn_status xnn_setup_constant_pad_nd_x32(
2469 xnn_operator_t constant_pad_op,
2470 size_t num_dims,
2471 const size_t* input_shape,
2472 const size_t* pre_padding,
2473 const size_t* post_padding,
2474 const void* input,
2475 void* output,
2476 pthreadpool_t threadpool);
2477
2478enum xnn_status xnn_run_constant_pad_nd_x32(
2479 uint32_t flags,
2480 size_t num_dims,
2481 const size_t* input_shape,
2482 const size_t* pre_paddings,
2483 const size_t* post_paddings,
2484 const void* input,
2485 void* output,
2486 const void* padding_value,
2487 pthreadpool_t threadpool);
2488
2489enum xnn_status xnn_create_copy_nc_x32(
2490 size_t channels,
2491 size_t input_stride,
2492 size_t output_stride,
2493 uint32_t flags,
2494 xnn_operator_t* copy_op_out);
2495
2496enum xnn_status xnn_setup_copy_nc_x32(
2497 xnn_operator_t copy_op,
2498 size_t batch_size,
2499 const void* input,
2500 void* output,
2501 pthreadpool_t threadpool);
2502
2503enum xnn_status xnn_run_copy_nc_x32(
2504 size_t channels,
2505 size_t input_stride,
2506 size_t output_stride,
2507 size_t batch_size,
2508 const uint32_t* input,
2509 uint32_t* output,
2510 uint32_t flags,
2511 pthreadpool_t threadpool);
2512
2513enum xnn_status xnn_create_depth_to_space_nhwc_x32(
2514 size_t output_channels,
2515 size_t input_channel_stride,
2516 size_t output_channel_stride,
2517 uint32_t block_size,
2518 uint32_t flags,
2519 xnn_operator_t* depth_to_space_op_out);
2520
2521enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
2522 xnn_operator_t depth_to_space_op,
2523 size_t batch_size,
2524 size_t input_height,
2525 size_t input_width,
2526 const void* input,
2527 void* output,
2528 pthreadpool_t threadpool);
2529
2530enum xnn_status xnn_create_slice_nd_x32(
2531 uint32_t flags,
2532 xnn_operator_t* slice_op_out);
2533
2534enum xnn_status xnn_setup_slice_nd_x32(
2535 xnn_operator_t slice_op,
2536 size_t num_dims,
2537 const size_t* input_shape,
2538 const size_t* offsets,
2539 const size_t* sizes,
2540 const void* input,
2541 void* output,
2542 pthreadpool_t threadpool);
2543
2544enum xnn_status xnn_run_slice_nd_x32(
2545 size_t num_dims,
2546 const size_t* input_shape,
2547 const size_t* offsets,
2548 const size_t* sizes,
2549 const void* input,
2550 void* output,
2551 uint32_t flags,
2552 pthreadpool_t threadpool);
2553
2554enum xnn_status xnn_create_space_to_depth_nhwc_x32(
2555 size_t input_channels,
2556 size_t input_channel_stride,
2557 size_t output_channel_stride,
2558 uint32_t block_size,
2559 uint32_t flags,
2560 xnn_operator_t* space_to_depth_op_out);
2561
2562enum xnn_status xnn_setup_space_to_depth_nhwc_x32(
2563 xnn_operator_t space_to_depth_op,
2564 size_t batch_size,
2565 size_t input_height,
2566 size_t input_width,
2567 const void* input,
2568 void* output,
2569 pthreadpool_t threadpool);
2570
2571enum xnn_status xnn_create_transpose_nd_x32(
2572 uint32_t flags,
2573 xnn_operator_t* transpose_op_out);
2574
2575enum xnn_status xnn_setup_transpose_nd_x32(
2576 xnn_operator_t transpose_op,
2577 const void* input,
2578 void* output,
2579 const size_t num_dims,
2580 const size_t* input_shape,
2581 const size_t* output_perm,
2582 pthreadpool_t threadpool);
2583
2584enum xnn_status xnn_run_transpose_nd_x32(
2585 const void* input,
2586 void* output,
2587 const size_t num_dims,
2588 const size_t* input_shape,
2589 const size_t* output_perm,
2590 uint32_t flags,
2591 pthreadpool_t threadpool);
2592
2593enum xnn_status xnn_create_unpooling2d_nhwc_x32(
2594 uint32_t input_padding_top,
2595 uint32_t input_padding_right,
2596 uint32_t input_padding_bottom,
2597 uint32_t input_padding_left,
2598 uint32_t pooling_height,
2599 uint32_t pooling_width,
2600 size_t channels,
2601 size_t input_pixel_stride,
2602 size_t output_pixel_stride,
2603 uint32_t flags,
2604 xnn_operator_t* unpooling_op_out);
2605
2606enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
2607 xnn_operator_t unpooling_op,
2608 size_t batch_size,
2609 size_t input_height,
2610 size_t input_width,
2611 const void* input,
2612 const uint32_t* index,
2613 void* output,
2614 pthreadpool_t threadpool);
2615
2616#endif // XNN_NO_X32_OPERATORS
2617
2618#ifndef XNN_NO_F16_OPERATORS
2619
2620enum xnn_status xnn_create_abs_nc_f16(
2621 size_t channels,
2622 size_t input_stride,
2623 size_t output_stride,
2624 uint32_t flags,
2625 xnn_operator_t* abs_op_out);
2626
2627enum xnn_status xnn_setup_abs_nc_f16(
2628 xnn_operator_t abs_op,
2629 size_t batch_size,
2630 const void* input,
2631 void* output,
2632 pthreadpool_t threadpool);
2633
2634enum xnn_status xnn_create_add_nd_f16(
2635 float output_min,
2636 float output_max,
2637 uint32_t flags,
2638 xnn_operator_t* add_op_out);
2639
2640enum xnn_status xnn_setup_add_nd_f16(
2641 xnn_operator_t add_op,
2642 size_t num_input1_dims,
2643 const size_t* input1_shape,
2644 size_t num_input2_dims,
2645 const size_t* input2_shape,
2646 const void* input1,
2647 const void* input2,
2648 void* output,
2649 pthreadpool_t threadpool);
2650
2651enum xnn_status xnn_create_average_pooling2d_nhwc_f16(
2652 uint32_t input_padding_top,
2653 uint32_t input_padding_right,
2654 uint32_t input_padding_bottom,
2655 uint32_t input_padding_left,
2656 uint32_t pooling_height,
2657 uint32_t pooling_width,
2658 uint32_t stride_height,
2659 uint32_t stride_width,
2660 size_t channels,
2661 size_t input_pixel_stride,
2662 size_t output_pixel_stride,
2663 float output_min,
2664 float output_max,
2665 uint32_t flags,
2666 xnn_operator_t* average_pooling_op_out);
2667
2668enum xnn_status xnn_setup_average_pooling2d_nhwc_f16(
2669 xnn_operator_t average_pooling_op,
2670 size_t batch_size,
2671 size_t input_height,
2672 size_t input_width,
2673 const void* input,
2674 void* output,
2675 pthreadpool_t threadpool);
2676
2677enum xnn_status xnn_create_bankers_rounding_nc_f16(
2678 size_t channels,
2679 size_t input_stride,
2680 size_t output_stride,
2681 uint32_t flags,
2682 xnn_operator_t* rounding_op_out);
2683
2684enum xnn_status xnn_setup_bankers_rounding_nc_f16(
2685 xnn_operator_t rounding_op,
2686 size_t batch_size,
2687 const void* input,
2688 void* output,
2689 pthreadpool_t threadpool);
2690
2691enum xnn_status xnn_create_ceiling_nc_f16(
2692 size_t channels,
2693 size_t input_stride,
2694 size_t output_stride,
2695 uint32_t flags,
2696 xnn_operator_t* ceiling_op_out);
2697
2698enum xnn_status xnn_setup_ceiling_nc_f16(
2699 xnn_operator_t ceiling_op,
2700 size_t batch_size,
2701 const void* input,
2702 void* output,
2703 pthreadpool_t threadpool);
2704
2705enum xnn_status xnn_create_clamp_nc_f16(
2706 size_t channels,
2707 size_t input_stride,
2708 size_t output_stride,
2709 float output_min,
2710 float output_max,
2711 uint32_t flags,
2712 xnn_operator_t* clamp_op_out);
2713
2714enum xnn_status xnn_setup_clamp_nc_f16(
2715 xnn_operator_t clamp_op,
2716 size_t batch_size,
2717 const void* input,
2718 void* output,
2719 pthreadpool_t threadpool);
2720
2721enum xnn_status xnn_create_convolution2d_nhwc_f16(
2722 uint32_t input_padding_top,
2723 uint32_t input_padding_right,
2724 uint32_t input_padding_bottom,
2725 uint32_t input_padding_left,
2726 uint32_t kernel_height,
2727 uint32_t kernel_width,
2728 uint32_t subsampling_height,
2729 uint32_t subsampling_width,
2730 uint32_t dilation_height,
2731 uint32_t dilation_width,
2732 uint32_t groups,
2733 size_t group_input_channels,
2734 size_t group_output_channels,
2735 size_t input_channel_stride,
2736 size_t output_channel_stride,
2737 const void* kernel,
2738 const void* bias,
2739 float output_min,
2740 float output_max,
2741 uint32_t flags,
2742 xnn_caches_t caches,
2743 xnn_operator_t* convolution_op_out);
2744
2745enum xnn_status xnn_setup_convolution2d_nhwc_f16(
2746 xnn_operator_t convolution_op,
2747 size_t batch_size,
2748 size_t input_height,
2749 size_t input_width,
2750 const void* input,
2751 void* output,
2752 pthreadpool_t threadpool);
2753
2754enum xnn_status xnn_create_deconvolution2d_nhwc_f16(
2755 uint32_t output_padding_top,
2756 uint32_t output_padding_right,
2757 uint32_t output_padding_bottom,
2758 uint32_t output_padding_left,
2759 uint32_t kernel_height,
2760 uint32_t kernel_width,
2761 uint32_t stride_height,
2762 uint32_t stride_width,
2763 uint32_t dilation_height,
2764 uint32_t dilation_width,
2765 uint32_t groups,
2766 size_t group_input_channels,
2767 size_t group_output_channels,
2768 size_t input_pixel_stride,
2769 size_t output_pixel_stride,
2770 const void* kernel,
2771 const void* bias,
2772 float output_min,
2773 float output_max,
2774 uint32_t flags,
2775 xnn_caches_t caches,
2776 xnn_operator_t* deconvolution_op_out);
2777
2778enum xnn_status xnn_setup_deconvolution2d_nhwc_f16(
2779 xnn_operator_t deconvolution_op,
2780 size_t batch_size,
2781 size_t input_height,
2782 size_t input_width,
2783 uint32_t adjustment_height,
2784 uint32_t adjustment_width,
2785 const void* input,
2786 void* output,
2787 pthreadpool_t threadpool);
2788
2789enum xnn_status xnn_create_divide_nd_f16(
2790 float output_min,
2791 float output_max,
2792 uint32_t flags,
2793 xnn_operator_t* divide_op_out);
2794
2795enum xnn_status xnn_setup_divide_nd_f16(
2796 xnn_operator_t divide_op,
2797 size_t num_input1_dims,
2798 const size_t* input1_shape,
2799 size_t num_input2_dims,
2800 const size_t* input2_shape,
2801 const void* input1,
2802 const void* input2,
2803 void* output,
2804 pthreadpool_t threadpool);
2805
2806enum xnn_status xnn_create_elu_nc_f16(
2807 size_t channels,
2808 size_t input_stride,
2809 size_t output_stride,
2810 float alpha,
2811 uint32_t flags,
2812 xnn_operator_t* elu_op_out);
2813
2814enum xnn_status xnn_setup_elu_nc_f16(
2815 xnn_operator_t elu_op,
2816 size_t batch_size,
2817 const void* input,
2818 void* output,
2819 pthreadpool_t threadpool);
2820
2821enum xnn_status xnn_create_floor_nc_f16(
2822 size_t channels,
2823 size_t input_stride,
2824 size_t output_stride,
2825 uint32_t flags,
2826 xnn_operator_t* floor_op_out);
2827
2828enum xnn_status xnn_setup_floor_nc_f16(
2829 xnn_operator_t floor_op,
2830 size_t batch_size,
2831 const void* input,
2832 void* output,
2833 pthreadpool_t threadpool);
2834
2835enum xnn_status xnn_create_fully_connected_nc_f16(
2836 size_t input_channels,
2837 size_t output_channels,
2838 size_t input_stride,
2839 size_t output_stride,
2840 const void* kernel,
2841 const void* bias,
2842 float output_min,
2843 float output_max,
2844 uint32_t flags,
2845 xnn_caches_t caches,
2846 xnn_operator_t* fully_connected_op_out);
2847
2848enum xnn_status xnn_setup_fully_connected_nc_f16(
2849 xnn_operator_t fully_connected_op,
2850 size_t batch_size,
2851 const void* input,
2852 void* output,
2853 pthreadpool_t threadpool);
2854
2855enum xnn_status xnn_create_global_average_pooling_nwc_f16(
2856 size_t channels,
2857 size_t input_stride,
2858 size_t output_stride,
2859 float output_min,
2860 float output_max,
2861 uint32_t flags,
2862 xnn_operator_t* global_average_pooling_op_out);
2863
2864enum xnn_status xnn_setup_global_average_pooling_nwc_f16(
2865 xnn_operator_t global_average_pooling_op,
2866 size_t batch_size,
2867 size_t width,
2868 const void* input,
2869 void* output,
2870 pthreadpool_t threadpool);
2871
2872enum xnn_status xnn_create_hardswish_nc_f16(
2873 size_t channels,
2874 size_t input_stride,
2875 size_t output_stride,
2876 uint32_t flags,
2877 xnn_operator_t* hardswish_op_out);
2878
2879enum xnn_status xnn_setup_hardswish_nc_f16(
2880 xnn_operator_t hardswish_op,
2881 size_t batch_size,
2882 const void* input,
2883 void* output,
2884 pthreadpool_t threadpool);
2885
2886enum xnn_status xnn_create_leaky_relu_nc_f16(
2887 size_t channels,
2888 size_t input_stride,
2889 size_t output_stride,
2890 float negative_slope,
2891 uint32_t flags,
2892 xnn_operator_t* leaky_relu_op_out);
2893
2894enum xnn_status xnn_setup_leaky_relu_nc_f16(
2895 xnn_operator_t leaky_relu_op,
2896 size_t batch_size,
2897 const void* input,
2898 void* output,
2899 pthreadpool_t threadpool);
2900
2901enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
2902 uint32_t input_padding_top,
2903 uint32_t input_padding_right,
2904 uint32_t input_padding_bottom,
2905 uint32_t input_padding_left,
2906 uint32_t pooling_height,
2907 uint32_t pooling_width,
2908 uint32_t stride_height,
2909 uint32_t stride_width,
2910 uint32_t dilation_height,
2911 uint32_t dilation_width,
2912 size_t channels,
2913 size_t input_pixel_stride,
2914 size_t output_pixel_stride,
2915 float output_min,
2916 float output_max,
2917 uint32_t flags,
2918 xnn_operator_t* max_pooling_op_out);
2919
2920enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
2921 xnn_operator_t max_pooling_op,
2922 size_t batch_size,
2923 size_t input_height,
2924 size_t input_width,
2925 const void* input,
2926 void* output,
2927 pthreadpool_t threadpool);
2928
2929enum xnn_status xnn_create_maximum_nd_f16(
2930 uint32_t flags,
2931 xnn_operator_t* maximum_op_out);
2932
2933enum xnn_status xnn_setup_maximum_nd_f16(
2934 xnn_operator_t maximum_op,
2935 size_t num_input1_dims,
2936 const size_t* input1_shape,
2937 size_t num_input2_dims,
2938 const size_t* input2_shape,
2939 const void* input1,
2940 const void* input2,
2941 void* output,
2942 pthreadpool_t threadpool);
2943
2944enum xnn_status xnn_create_minimum_nd_f16(
2945 uint32_t flags,
2946 xnn_operator_t* minimum_op_out);
2947
2948enum xnn_status xnn_setup_minimum_nd_f16(
2949 xnn_operator_t minimum_op,
2950 size_t num_input1_dims,
2951 const size_t* input1_shape,
2952 size_t num_input2_dims,
2953 const size_t* input2_shape,
2954 const void* input1,
2955 const void* input2,
2956 void* output,
2957 pthreadpool_t threadpool);
2958
2959enum xnn_status xnn_create_multiply_nd_f16(
2960 float output_min,
2961 float output_max,
2962 uint32_t flags,
2963 xnn_operator_t* multiply_op_out);
2964
2965enum xnn_status xnn_setup_multiply_nd_f16(
2966 xnn_operator_t multiply_op,
2967 size_t num_input1_dims,
2968 const size_t* input1_shape,
2969 size_t num_input2_dims,
2970 const size_t* input2_shape,
2971 const void* input1,
2972 const void* input2,
2973 void* output,
2974 pthreadpool_t threadpool);
2975
2976enum xnn_status xnn_create_negate_nc_f16(
2977 size_t channels,
2978 size_t input_stride,
2979 size_t output_stride,
2980 uint32_t flags,
2981 xnn_operator_t* negate_op_out);
2982
2983enum xnn_status xnn_setup_negate_nc_f16(
2984 xnn_operator_t negate_op,
2985 size_t batch_size,
2986 const void* input,
2987 void* output,
2988 pthreadpool_t threadpool);
2989
2990enum xnn_status xnn_create_prelu_nc_f16(
2991 size_t channels,
2992 size_t input_stride,
2993 size_t output_stride,
2994 const void* negative_slope,
2995 uint32_t flags,
2996 xnn_caches_t caches,
2997 xnn_operator_t* prelu_op_out);
2998
2999enum xnn_status xnn_setup_prelu_nc_f16(
3000 xnn_operator_t prelu_op,
3001 size_t batch_size,
3002 const void* input,
3003 void* output,
3004 pthreadpool_t threadpool);
3005
3006enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16(
3007 size_t channels,
3008 size_t input_pixel_stride,
3009 size_t output_pixel_stride,
3010 uint32_t flags,
3011 xnn_operator_t* resize_op_out);
3012
3013enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16(
3014 xnn_operator_t resize_op,
3015 size_t batch_size,
3016 size_t input_height,
3017 size_t input_width,
3018 size_t output_height,
3019 size_t output_width,
3020 const void* input,
3021 void* output,
3022 pthreadpool_t threadpool);
3023
3024enum xnn_status xnn_create_sigmoid_nc_f16(
3025 size_t channels,
3026 size_t input_stride,
3027 size_t output_stride,
3028 uint32_t flags,
3029 xnn_operator_t* sigmoid_op_out);
3030
3031enum xnn_status xnn_setup_sigmoid_nc_f16(
3032 xnn_operator_t sigmoid_op,
3033 size_t batch_size,
3034 const void* input,
3035 void* output,
3036 pthreadpool_t threadpool);
3037
3038enum xnn_status xnn_create_softmax_nc_f16(
3039 size_t channels,
3040 size_t input_stride,
3041 size_t output_stride,
3042 uint32_t flags,
3043 xnn_operator_t* softmax_op_out);
3044
3045enum xnn_status xnn_setup_softmax_nc_f16(
3046 xnn_operator_t softmax_op,
3047 size_t batch_size,
3048 const void* input,
3049 void* output,
3050 pthreadpool_t threadpool);
3051
3052enum xnn_status xnn_create_square_nc_f16(
3053 size_t channels,
3054 size_t input_stride,
3055 size_t output_stride,
3056 uint32_t flags,
3057 xnn_operator_t* square_op_out);
3058
3059enum xnn_status xnn_setup_square_nc_f16(
3060 xnn_operator_t square_op,
3061 size_t batch_size,
3062 const void* input,
3063 void* output,
3064 pthreadpool_t threadpool);
3065
3066enum xnn_status xnn_create_square_root_nc_f16(
3067 size_t channels,
3068 size_t input_stride,
3069 size_t output_stride,
3070 uint32_t flags,
3071 xnn_operator_t* sqrt_op_out);
3072
3073enum xnn_status xnn_setup_square_root_nc_f16(
3074 xnn_operator_t sqrt_op,
3075 size_t batch_size,
3076 const void* input,
3077 void* output,
3078 pthreadpool_t threadpool);
3079
3080enum xnn_status xnn_create_squared_difference_nd_f16(
3081 uint32_t flags,
3082 xnn_operator_t* squared_difference_op_out);
3083
3084enum xnn_status xnn_setup_squared_difference_nd_f16(
3085 xnn_operator_t squared_difference_op,
3086 size_t num_input1_dims,
3087 const size_t* input1_shape,
3088 size_t num_input2_dims,
3089 const size_t* input2_shape,
3090 const void* input1,
3091 const void* input2,
3092 void* output,
3093 pthreadpool_t threadpool);
3094
3095enum xnn_status xnn_create_subtract_nd_f16(
3096 float output_min,
3097 float output_max,
3098 uint32_t flags,
3099 xnn_operator_t* subtract_op_out);
3100
3101enum xnn_status xnn_setup_subtract_nd_f16(
3102 xnn_operator_t subtract_op,
3103 size_t num_input1_dims,
3104 const size_t* input1_shape,
3105 size_t num_input2_dims,
3106 const size_t* input2_shape,
3107 const void* input1,
3108 const void* input2,
3109 void* output,
3110 pthreadpool_t threadpool);
3111
3112enum xnn_status xnn_create_truncation_nc_f16(
3113 size_t channels,
3114 size_t input_stride,
3115 size_t output_stride,
3116 uint32_t flags,
3117 xnn_operator_t* truncation_op_out);
3118
3119enum xnn_status xnn_setup_truncation_nc_f16(
3120 xnn_operator_t truncation_op,
3121 size_t batch_size,
3122 const void* input,
3123 void* output,
3124 pthreadpool_t threadpool);
3125
3126#ifndef XNN_NO_NCHW_OPERATORS
3127
3128enum xnn_status xnn_create_convolution2d_nchw_f16(
3129 uint32_t input_padding_top,
3130 uint32_t input_padding_right,
3131 uint32_t input_padding_bottom,
3132 uint32_t input_padding_left,
3133 uint32_t kernel_height,
3134 uint32_t kernel_width,
3135 uint32_t subsampling_height,
3136 uint32_t subsampling_width,
3137 uint32_t dilation_height,
3138 uint32_t dilation_width,
3139 uint32_t groups,
3140 size_t group_input_channels,
3141 size_t group_output_channels,
3142 size_t input_channel_stride,
3143 size_t output_channel_stride,
3144 const void* kernel,
3145 const void* bias,
3146 float output_min,
3147 float output_max,
3148 uint32_t flags,
3149 xnn_caches_t caches,
3150 xnn_operator_t* convolution_op_out);
3151
3152enum xnn_status xnn_setup_convolution2d_nchw_f16(
3153 xnn_operator_t convolution_op,
3154 size_t batch_size,
3155 size_t input_height,
3156 size_t input_width,
3157 const void* input,
3158 void* output,
3159 pthreadpool_t threadpool);
3160
3161enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x16(
3162 size_t output_channels,
3163 size_t input_channel_stride,
3164 size_t output_channel_stride,
3165 uint32_t block_size,
3166 uint32_t flags,
3167 xnn_operator_t* depth_to_space_op_out);
3168
3169enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x16(
3170 xnn_operator_t depth_to_space_op,
3171 size_t batch_size,
3172 size_t input_height,
3173 size_t input_width,
3174 const void* input,
3175 void* output,
3176 pthreadpool_t threadpool);
3177
3178enum xnn_status xnn_create_global_average_pooling_ncw_f16(
3179 size_t channels,
3180 float output_min,
3181 float output_max,
3182 uint32_t flags,
3183 xnn_operator_t* global_average_pooling_op_out);
3184
3185enum xnn_status xnn_setup_global_average_pooling_ncw_f16(
3186 xnn_operator_t global_average_pooling_op,
3187 size_t batch_size,
3188 size_t width,
3189 const void* input,
3190 void* output,
3191 pthreadpool_t threadpool);
3192
3193enum xnn_status xnn_create_resize_bilinear2d_nchw_f16(
3194 size_t channels,
3195 size_t input_pixel_stride,
3196 size_t output_pixel_stride,
3197 uint32_t flags,
3198 xnn_operator_t* resize_op_out);
3199
3200enum xnn_status xnn_setup_resize_bilinear2d_nchw_f16(
3201 xnn_operator_t resize_op,
3202 size_t batch_size,
3203 size_t input_height,
3204 size_t input_width,
3205 size_t output_height,
3206 size_t output_width,
3207 const void* input,
3208 void* output,
3209 pthreadpool_t threadpool);
3210
3211#endif // XNN_NO_NCHW_OPERATORS
3212
3213#endif // XNN_NO_F16_OPERATORS
3214
3215#ifndef XNN_NO_X16_OPERATORS
3216
3217enum xnn_status xnn_create_constant_pad_nd_x16(
3218 const void* padding_value,
3219 uint32_t flags,
3220 xnn_operator_t* constant_pad_op_out);
3221
3222enum xnn_status xnn_setup_constant_pad_nd_x16(
3223 xnn_operator_t constant_pad_op,
3224 size_t num_dims,
3225 const size_t* input_shape,
3226 const size_t* pre_padding,
3227 const size_t* post_padding,
3228 const void* input,
3229 void* output,
3230 pthreadpool_t threadpool);
3231
3232enum xnn_status xnn_run_constant_pad_nd_x16(
3233 uint32_t flags,
3234 size_t num_dims,
3235 const size_t* input_shape,
3236 const size_t* pre_paddings,
3237 const size_t* post_paddings,
3238 const void* input,
3239 void* output,
3240 const void* padding_value,
3241 pthreadpool_t threadpool);
3242
3243enum xnn_status xnn_create_copy_nc_x16(
3244 size_t channels,
3245 size_t input_stride,
3246 size_t output_stride,
3247 uint32_t flags,
3248 xnn_operator_t* copy_op_out);
3249
3250enum xnn_status xnn_setup_copy_nc_x16(
3251 xnn_operator_t copy_op,
3252 size_t batch_size,
3253 const void* input,
3254 void* output,
3255 pthreadpool_t threadpool);
3256
3257enum xnn_status xnn_create_depth_to_space_nhwc_x16(
3258 size_t output_channels,
3259 size_t input_channel_stride,
3260 size_t output_channel_stride,
3261 uint32_t block_size,
3262 uint32_t flags,
3263 xnn_operator_t* depth_to_space_op_out);
3264
3265enum xnn_status xnn_setup_depth_to_space_nhwc_x16(
3266 xnn_operator_t depth_to_space_op,
3267 size_t batch_size,
3268 size_t input_height,
3269 size_t input_width,
3270 const void* input,
3271 void* output,
3272 pthreadpool_t threadpool);
3273
3274enum xnn_status xnn_create_slice_nd_x16(
3275 uint32_t flags,
3276 xnn_operator_t* slice_op_out);
3277
3278enum xnn_status xnn_setup_slice_nd_x16(
3279 xnn_operator_t slice_op,
3280 size_t num_dims,
3281 const size_t* input_shape,
3282 const size_t* offsets,
3283 const size_t* sizes,
3284 const void* input,
3285 void* output,
3286 pthreadpool_t threadpool);
3287
3288enum xnn_status xnn_create_space_to_depth_nhwc_x16(
3289 size_t input_channels,
3290 size_t input_channel_stride,
3291 size_t output_channel_stride,
3292 uint32_t block_size,
3293 uint32_t flags,
3294 xnn_operator_t* space_to_depth_op_out);
3295
3296enum xnn_status xnn_setup_space_to_depth_nhwc_x16(
3297 xnn_operator_t space_to_depth_op,
3298 size_t batch_size,
3299 size_t input_height,
3300 size_t input_width,
3301 const void* input,
3302 void* output,
3303 pthreadpool_t threadpool);
3304
3305enum xnn_status xnn_create_transpose_nd_x16(
3306 uint32_t flags,
3307 xnn_operator_t* transpose_op_out);
3308
3309enum xnn_status xnn_setup_transpose_nd_x16(
3310 xnn_operator_t transpose_op,
3311 const void* input,
3312 void* output,
3313 const size_t num_dims,
3314 const size_t* input_shape,
3315 const size_t* output_perm,
3316 pthreadpool_t threadpool);
3317
3318enum xnn_status xnn_run_transpose_nd_x16(
3319 const void* input,
3320 void* output,
3321 const size_t num_dims,
3322 const size_t* input_shape,
3323 const size_t* output_perm,
3324 uint32_t flags,
3325 pthreadpool_t threadpool);
3326
3327#endif // XNN_NO_X16_OPERATORS
3328
3329#ifndef XNN_NO_QC8_OPERATORS
3330
3331enum xnn_status xnn_create_convolution2d_nhwc_qc8(
3332 uint32_t input_padding_top,
3333 uint32_t input_padding_right,
3334 uint32_t input_padding_bottom,
3335 uint32_t input_padding_left,
3336 uint32_t kernel_height,
3337 uint32_t kernel_width,
3338 uint32_t subsampling_height,
3339 uint32_t subsampling_width,
3340 uint32_t dilation_height,
3341 uint32_t dilation_width,
3342 uint32_t groups,
3343 size_t group_input_channels,
3344 size_t group_output_channels,
3345 size_t input_channel_stride,
3346 size_t output_channel_stride,
3347 int8_t input_zero_point,
3348 float input_scale,
3349 const float* kernel_scale,
3350 const int8_t* kernel,
3351 const int32_t* bias,
3352 int8_t output_zero_point,
3353 float output_scale,
3354 int8_t output_min,
3355 int8_t output_max,
3356 uint32_t flags,
3357 xnn_caches_t caches,
3358 xnn_operator_t* convolution_op_out);
3359
3360enum xnn_status xnn_setup_convolution2d_nhwc_qc8(
3361 xnn_operator_t convolution_op,
3362 size_t batch_size,
3363 size_t input_height,
3364 size_t input_width,
3365 const int8_t* input,
3366 int8_t* output,
3367 pthreadpool_t threadpool);
3368
3369#endif // XNN_NO_QC8_OPERATORS
3370
3371#ifndef XNN_NO_QS8_OPERATORS
3372
3373enum xnn_status xnn_create_add_nd_qs8(
3374 int8_t input1_zero_point,
3375 float input1_scale,
3376 int8_t input2_zero_point,
3377 float input2_scale,
3378 int8_t output_zero_point,
3379 float output_scale,
3380 int8_t output_min,
3381 int8_t output_max,
3382 uint32_t flags,
3383 xnn_operator_t* add_op_out);
3384
3385enum xnn_status xnn_setup_add_nd_qs8(
3386 xnn_operator_t add_op,
3387 size_t num_input1_dims,
3388 const size_t* input1_shape,
3389 size_t num_input2_dims,
3390 const size_t* input2_shape,
3391 const int8_t* input1,
3392 const int8_t* input2,
3393 int8_t* output,
3394 pthreadpool_t threadpool);
3395
3396enum xnn_status xnn_run_add_nd_qs8(
3397 size_t num_input1_dims,
3398 const size_t* input1_shape,
3399 int8_t input1_zero_point,
3400 float input1_scale,
3401 size_t num_input2_dims,
3402 const size_t* input2_shape,
3403 int8_t input2_zero_point,
3404 float input2_scale,
3405 const int8_t* input1,
3406 const int8_t* input2,
3407 int8_t* output,
3408 int8_t output_zero_point,
3409 float output_scale,
3410 int8_t output_min,
3411 int8_t output_max,
3412 uint32_t flags,
3413 pthreadpool_t threadpool);
3414
3415enum xnn_status xnn_create_convolution2d_nhwc_qs8(
3416 uint32_t input_padding_top,
3417 uint32_t input_padding_right,
3418 uint32_t input_padding_bottom,
3419 uint32_t input_padding_left,
3420 uint32_t kernel_height,
3421 uint32_t kernel_width,
3422 uint32_t subsampling_height,
3423 uint32_t subsampling_width,
3424 uint32_t dilation_height,
3425 uint32_t dilation_width,
3426 uint32_t groups,
3427 size_t group_input_channels,
3428 size_t group_output_channels,
3429 size_t input_channel_stride,
3430 size_t output_channel_stride,
3431 int8_t input_zero_point,
3432 float input_scale,
3433 float kernel_scale,
3434 const int8_t* kernel,
3435 const int32_t* bias,
3436 int8_t output_zero_point,
3437 float output_scale,
3438 int8_t output_min,
3439 int8_t output_max,
3440 uint32_t flags,
3441 xnn_caches_t caches,
3442 xnn_operator_t* convolution_op_out);
3443
3444enum xnn_status xnn_setup_convolution2d_nhwc_qs8(
3445 xnn_operator_t convolution_op,
3446 size_t batch_size,
3447 size_t input_height,
3448 size_t input_width,
3449 const int8_t* input,
3450 int8_t* output,
3451 pthreadpool_t threadpool);
3452
3453enum xnn_status xnn_create_deconvolution2d_nhwc_qs8(
3454 uint32_t output_padding_top,
3455 uint32_t output_padding_right,
3456 uint32_t output_padding_bottom,
3457 uint32_t output_padding_left,
3458 uint32_t kernel_height,
3459 uint32_t kernel_width,
3460 uint32_t stride_height,
3461 uint32_t stride_width,
3462 uint32_t dilation_height,
3463 uint32_t dilation_width,
3464 uint32_t groups,
3465 size_t group_input_channels,
3466 size_t group_output_channels,
3467 size_t input_pixel_stride,
3468 size_t output_pixel_stride,
3469 int8_t input_zero_point,
3470 float input_scale,
3471 float kernel_scale,
3472 const int8_t* kernel,
3473 const int32_t* bias,
3474 int8_t output_zero_point,
3475 float output_scale,
3476 int8_t output_min,
3477 int8_t output_max,
3478 uint32_t flags,
3479 xnn_caches_t caches,
3480 xnn_operator_t* deconvolution_op_out);
3481
3482enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8(
3483 xnn_operator_t deconvolution_op,
3484 size_t batch_size,
3485 size_t input_height,
3486 size_t input_width,
3487 uint32_t adjustment_height,
3488 uint32_t adjustment_width,
3489 const int8_t* input,
3490 int8_t* output,
3491 pthreadpool_t threadpool);
3492
3493enum xnn_status xnn_create_elu_nc_qs8(
3494 size_t channels,
3495 size_t input_stride,
3496 size_t output_stride,
3497 float alpha,
3498 int8_t input_zero_point,
3499 float input_scale,
3500 int8_t output_zero_point,
3501 float output_scale,
3502 int8_t output_min,
3503 int8_t output_max,
3504 uint32_t flags,
3505 xnn_operator_t* elu_op_out);
3506
3507enum xnn_status xnn_setup_elu_nc_qs8(
3508 xnn_operator_t elu_op,
3509 size_t batch_size,
3510 const int8_t* input,
3511 int8_t* output,
3512 pthreadpool_t threadpool);
3513
3514enum xnn_status xnn_create_fully_connected_nc_qs8(
3515 size_t input_channels,
3516 size_t output_channels,
3517 size_t input_stride,
3518 size_t output_stride,
3519 int8_t input_zero_point,
3520 float input_scale,
3521 float kernel_scale,
3522 const int8_t* kernel,
3523 const int32_t* bias,
3524 int8_t output_zero_point,
3525 float output_scale,
3526 int8_t output_min,
3527 int8_t output_max,
3528 uint32_t flags,
3529 xnn_caches_t caches,
3530 xnn_operator_t* fully_connected_op_out);
3531
3532enum xnn_status xnn_setup_fully_connected_nc_qs8(
3533 xnn_operator_t fully_connected_op,
3534 size_t batch_size,
3535 const int8_t* input,
3536 int8_t* output,
3537 pthreadpool_t threadpool);
3538
3539enum xnn_status xnn_create_global_average_pooling_nwc_qs8(
3540 size_t channels,
3541 size_t input_stride,
3542 size_t output_stride,
3543 int8_t input_zero_point,
3544 float input_scale,
3545 int8_t output_zero_point,
3546 float output_scale,
3547 int8_t output_min,
3548 int8_t output_max,
3549 uint32_t flags,
3550 xnn_operator_t* global_average_pooling_op_out);
3551
3552enum xnn_status xnn_setup_global_average_pooling_nwc_qs8(
3553 xnn_operator_t global_average_pooling_op,
3554 size_t batch_size,
3555 size_t width,
3556 const int8_t* input,
3557 int8_t* output,
3558 pthreadpool_t threadpool);
3559
3560enum xnn_status xnn_create_multiply_nd_qs8(
3561 int8_t input1_zero_point,
3562 float input1_scale,
3563 int8_t input2_zero_point,
3564 float input2_scale,
3565 int8_t output_zero_point,
3566 float output_scale,
3567 int8_t output_min,
3568 int8_t output_max,
3569 uint32_t flags,
3570 xnn_operator_t* multiply_op_out);
3571
3572enum xnn_status xnn_setup_multiply_nd_qs8(
3573 xnn_operator_t multiply_op,
3574 size_t num_input1_dims,
3575 const size_t* input1_shape,
3576 size_t num_input2_dims,
3577 const size_t* input2_shape,
3578 const int8_t* input1,
3579 const int8_t* input2,
3580 int8_t* output,
3581 pthreadpool_t threadpool);
3582
3583enum xnn_status xnn_run_multiply_nd_qs8(
3584 size_t num_input1_dims,
3585 const size_t* input1_shape,
3586 int8_t input1_zero_point,
3587 float input1_scale,
3588 size_t num_input2_dims,
3589 const size_t* input2_shape,
3590 int8_t input2_zero_point,
3591 float input2_scale,
3592 const int8_t* input1,
3593 const int8_t* input2,
3594 int8_t* output,
3595 int8_t output_zero_point,
3596 float output_scale,
3597 int8_t output_min,
3598 int8_t output_max,
3599 uint32_t flags,
3600 pthreadpool_t threadpool);
3601
3602enum xnn_status xnn_create_leaky_relu_nc_qs8(
3603 size_t channels,
3604 size_t input_stride,
3605 size_t output_stride,
3606 float negative_slope,
3607 int8_t input_zero_point,
3608 float input_scale,
3609 int8_t output_zero_point,
3610 float output_scale,
3611 uint32_t flags,
3612 xnn_operator_t* leaky_relu_op_out);
3613
3614enum xnn_status xnn_setup_leaky_relu_nc_qs8(
3615 xnn_operator_t leaky_relu_op,
3616 size_t batch_size,
3617 const int8_t* input,
3618 int8_t* output,
3619 pthreadpool_t threadpool);
3620
3621enum xnn_status xnn_create_sigmoid_nc_qs8(
3622 size_t channels,
3623 size_t input_stride,
3624 size_t output_stride,
3625 int8_t input_zero_point,
3626 float input_scale,
3627 int8_t output_zero_point,
3628 float output_scale,
3629 int8_t output_min,
3630 int8_t output_max,
3631 uint32_t flags,
3632 xnn_operator_t* sigmoid_op_out);
3633
3634enum xnn_status xnn_setup_sigmoid_nc_qs8(
3635 xnn_operator_t sigmoid_op,
3636 size_t batch_size,
3637 const int8_t* input,
3638 int8_t* output,
3639 pthreadpool_t threadpool);
3640
3641enum xnn_status xnn_create_subtract_nd_qs8(
3642 int8_t input1_zero_point,
3643 float input1_scale,
3644 int8_t input2_zero_point,
3645 float input2_scale,
3646 int8_t output_zero_point,
3647 float output_scale,
3648 int8_t output_min,
3649 int8_t output_max,
3650 uint32_t flags,
3651 xnn_operator_t* subtract_op_out);
3652
3653enum xnn_status xnn_setup_subtract_nd_qs8(
3654 xnn_operator_t subtract_op,
3655 size_t num_input1_dims,
3656 const size_t* input1_shape,
3657 size_t num_input2_dims,
3658 const size_t* input2_shape,
3659 const int8_t* input1,
3660 const int8_t* input2,
3661 int8_t* output,
3662 pthreadpool_t threadpool);
3663
3664enum xnn_status xnn_run_subtract_nd_qs8(
3665 size_t num_input1_dims,
3666 const size_t* input1_shape,
3667 int8_t input1_zero_point,
3668 float input1_scale,
3669 size_t num_input2_dims,
3670 const size_t* input2_shape,
3671 int8_t input2_zero_point,
3672 float input2_scale,
3673 const int8_t* input1,
3674 const int8_t* input2,
3675 int8_t* output,
3676 int8_t output_zero_point,
3677 float output_scale,
3678 int8_t output_min,
3679 int8_t output_max,
3680 uint32_t flags,
3681 pthreadpool_t threadpool);
3682
3683enum xnn_status xnn_create_tanh_nc_qs8(
3684 size_t channels,
3685 size_t input_stride,
3686 size_t output_stride,
3687 int8_t input_zero_point,
3688 float input_scale,
3689 int8_t output_zero_point,
3690 float output_scale,
3691 int8_t output_min,
3692 int8_t output_max,
3693 uint32_t flags,
3694 xnn_operator_t* tanh_op_out);
3695
3696enum xnn_status xnn_setup_tanh_nc_qs8(
3697 xnn_operator_t tanh_op,
3698 size_t batch_size,
3699 const int8_t* input,
3700 int8_t* output,
3701 pthreadpool_t threadpool);
3702
3703#endif // XNN_NO_QS8_OPERATORS
3704
3705#ifndef XNN_NO_QU8_OPERATORS
3706
3707enum xnn_status xnn_create_add_nd_qu8(
3708 uint8_t input1_zero_point,
3709 float input1_scale,
3710 uint8_t input2_zero_point,
3711 float input2_scale,
3712 uint8_t output_zero_point,
3713 float output_scale,
3714 uint8_t output_min,
3715 uint8_t output_max,
3716 uint32_t flags,
3717 xnn_operator_t* add_op_out);
3718
3719enum xnn_status xnn_setup_add_nd_qu8(
3720 xnn_operator_t add_op,
3721 size_t num_input1_dims,
3722 const size_t* input1_shape,
3723 size_t num_input2_dims,
3724 const size_t* input2_shape,
3725 const uint8_t* input1,
3726 const uint8_t* input2,
3727 uint8_t* output,
3728 pthreadpool_t threadpool);
3729
3730enum xnn_status xnn_run_add_nd_qu8(
3731 size_t num_input1_dims,
3732 const size_t* input1_shape,
3733 uint8_t input1_zero_point,
3734 float input1_scale,
3735 size_t num_input2_dims,
3736 const size_t* input2_shape,
3737 uint8_t input2_zero_point,
3738 float input2_scale,
3739 const uint8_t* input1,
3740 const uint8_t* input2,
3741 uint8_t* output,
3742 uint8_t output_zero_point,
3743 float output_scale,
3744 uint8_t output_min,
3745 uint8_t output_max,
3746 uint32_t flags,
3747 pthreadpool_t threadpool);
3748
3749enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
3750 uint32_t input_padding_top,
3751 uint32_t input_padding_right,
3752 uint32_t input_padding_bottom,
3753 uint32_t input_padding_left,
3754 uint32_t pooling_height,
3755 uint32_t pooling_width,
3756 uint32_t stride_height,
3757 uint32_t stride_width,
3758 size_t channels,
3759 size_t input_pixel_stride,
3760 size_t output_pixel_stride,
3761 uint8_t input_zero_point,
3762 float input_scale,
3763 uint8_t output_zero_point,
3764 float output_scale,
3765 uint8_t output_min,
3766 uint8_t output_max,
3767 uint32_t flags,
3768 xnn_operator_t* average_pooling_op_out);
3769
3770enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
3771 xnn_operator_t average_pooling_op,
3772 size_t batch_size,
3773 size_t input_height,
3774 size_t input_width,
3775 const uint8_t* input,
3776 uint8_t* output,
3777 pthreadpool_t threadpool);
3778
3779enum xnn_status xnn_create_convolution2d_nhwc_qu8(
3780 uint32_t input_padding_top,
3781 uint32_t input_padding_right,
3782 uint32_t input_padding_bottom,
3783 uint32_t input_padding_left,
3784 uint32_t kernel_height,
3785 uint32_t kernel_width,
3786 uint32_t subsampling_height,
3787 uint32_t subsampling_width,
3788 uint32_t dilation_height,
3789 uint32_t dilation_width,
3790 uint32_t groups,
3791 size_t group_input_channels,
3792 size_t group_output_channels,
3793 size_t input_channel_stride,
3794 size_t output_channel_stride,
3795 uint8_t input_zero_point,
3796 float input_scale,
3797 uint8_t kernel_zero_point,
3798 float kernel_scale,
3799 const uint8_t* kernel,
3800 const int32_t* bias,
3801 uint8_t output_zero_point,
3802 float output_scale,
3803 uint8_t output_min,
3804 uint8_t output_max,
3805 uint32_t flags,
3806 xnn_caches_t caches,
3807 xnn_operator_t* convolution_op_out);
3808
3809enum xnn_status xnn_setup_convolution2d_nhwc_qu8(
3810 xnn_operator_t convolution_op,
3811 size_t batch_size,
3812 size_t input_height,
3813 size_t input_width,
3814 const uint8_t* input,
3815 uint8_t* output,
3816 pthreadpool_t threadpool);
3817
3818enum xnn_status xnn_create_deconvolution2d_nhwc_qu8(
3819 uint32_t output_padding_top,
3820 uint32_t output_padding_right,
3821 uint32_t output_padding_bottom,
3822 uint32_t output_padding_left,
3823 uint32_t kernel_height,
3824 uint32_t kernel_width,
3825 uint32_t stride_height,
3826 uint32_t stride_width,
3827 uint32_t dilation_height,
3828 uint32_t dilation_width,
3829 uint32_t groups,
3830 size_t group_input_channels,
3831 size_t group_output_channels,
3832 size_t input_pixel_stride,
3833 size_t output_pixel_stride,
3834 uint8_t input_zero_point,
3835 float input_scale,
3836 uint8_t kernel_zero_point,
3837 float kernel_scale,
3838 const uint8_t* kernel,
3839 const int32_t* bias,
3840 uint8_t output_zero_point,
3841 float output_scale,
3842 uint8_t output_min,
3843 uint8_t output_max,
3844 uint32_t flags,
3845 xnn_caches_t caches,
3846 xnn_operator_t* deconvolution_op_out);
3847
3848enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8(
3849 xnn_operator_t deconvolution_op,
3850 size_t batch_size,
3851 size_t input_height,
3852 size_t input_width,
3853 uint32_t adjustment_height,
3854 uint32_t adjustment_width,
3855 const uint8_t* input,
3856 uint8_t* output,
3857 pthreadpool_t threadpool);
3858
3859enum xnn_status xnn_create_fully_connected_nc_qu8(
3860 size_t input_channels,
3861 size_t output_channels,
3862 size_t input_stride,
3863 size_t output_stride,
3864 uint8_t input_zero_point,
3865 float input_scale,
3866 uint8_t kernel_zero_point,
3867 float kernel_scale,
3868 const uint8_t* kernel,
3869 const int32_t* bias,
3870 uint8_t output_zero_point,
3871 float output_scale,
3872 uint8_t output_min,
3873 uint8_t output_max,
3874 uint32_t flags,
3875 xnn_caches_t caches,
3876 xnn_operator_t* fully_connected_op_out);
3877
3878enum xnn_status xnn_setup_fully_connected_nc_qu8(
3879 xnn_operator_t fully_connected_op,
3880 size_t batch_size,
3881 const uint8_t* input,
3882 uint8_t* output,
3883 pthreadpool_t threadpool);
3884
3885enum xnn_status xnn_create_global_average_pooling_nwc_qu8(
3886 size_t channels,
3887 size_t input_stride,
3888 size_t output_stride,
3889 uint8_t input_zero_point,
3890 float input_scale,
3891 uint8_t output_zero_point,
3892 float output_scale,
3893 uint8_t output_min,
3894 uint8_t output_max,
3895 uint32_t flags,
3896 xnn_operator_t* global_average_pooling_op_out);
3897
3898enum xnn_status xnn_setup_global_average_pooling_nwc_qu8(
3899 xnn_operator_t global_average_pooling_op,
3900 size_t batch_size,
3901 size_t width,
3902 const uint8_t* input,
3903 uint8_t* output,
3904 pthreadpool_t threadpool);
3905
3906enum xnn_status xnn_create_leaky_relu_nc_qu8(
3907 size_t channels,
3908 size_t input_stride,
3909 size_t output_stride,
3910 float negative_slope,
3911 uint8_t input_zero_point,
3912 float input_scale,
3913 uint8_t output_zero_point,
3914 float output_scale,
3915 uint32_t flags,
3916 xnn_operator_t* leaky_relu_op_out);
3917
3918enum xnn_status xnn_setup_leaky_relu_nc_qu8(
3919 xnn_operator_t leaky_relu_op,
3920 size_t batch_size,
3921 const uint8_t* input,
3922 uint8_t* output,
3923 pthreadpool_t threadpool);
3924
3925enum xnn_status xnn_create_multiply_nd_qu8(
3926 uint8_t input1_zero_point,
3927 float input1_scale,
3928 uint8_t input2_zero_point,
3929 float input2_scale,
3930 uint8_t output_zero_point,
3931 float output_scale,
3932 uint8_t output_min,
3933 uint8_t output_max,
3934 uint32_t flags,
3935 xnn_operator_t* multiply_op_out);
3936
3937enum xnn_status xnn_setup_multiply_nd_qu8(
3938 xnn_operator_t multiply_op,
3939 size_t num_input1_dims,
3940 const size_t* input1_shape,
3941 size_t num_input2_dims,
3942 const size_t* input2_shape,
3943 const uint8_t* input1,
3944 const uint8_t* input2,
3945 uint8_t* output,
3946 pthreadpool_t threadpool);
3947
3948enum xnn_status xnn_run_multiply_nd_qu8(
3949 size_t num_input1_dims,
3950 const size_t* input1_shape,
3951 uint8_t input1_zero_point,
3952 float input1_scale,
3953 size_t num_input2_dims,
3954 const size_t* input2_shape,
3955 uint8_t input2_zero_point,
3956 float input2_scale,
3957 const uint8_t* input1,
3958 const uint8_t* input2,
3959 uint8_t* output,
3960 uint8_t output_zero_point,
3961 float output_scale,
3962 uint8_t output_min,
3963 uint8_t output_max,
3964 uint32_t flags,
3965 pthreadpool_t threadpool);
3966
3967enum xnn_status xnn_create_sigmoid_nc_qu8(
3968 size_t channels,
3969 size_t input_stride,
3970 size_t output_stride,
3971 uint8_t input_zero_point,
3972 float input_scale,
3973 uint8_t output_zero_point,
3974 float output_scale,
3975 uint8_t output_min,
3976 uint8_t output_max,
3977 uint32_t flags,
3978 xnn_operator_t* sigmoid_op_out);
3979
3980enum xnn_status xnn_setup_sigmoid_nc_qu8(
3981 xnn_operator_t sigmoid_op,
3982 size_t batch_size,
3983 const uint8_t* input,
3984 uint8_t* output,
3985 pthreadpool_t threadpool);
3986
3987enum xnn_status xnn_create_softmax_nc_qu8(
3988 size_t channels,
3989 size_t input_stride,
3990 size_t output_stride,
3991 float input_scale,
3992 uint8_t output_zero_point,
3993 float output_scale,
3994 uint32_t flags,
3995 xnn_operator_t* softmax_op_out);
3996
3997enum xnn_status xnn_setup_softmax_nc_qu8(
3998 xnn_operator_t softmax_op,
3999 size_t batch_size,
4000 const uint8_t* input,
4001 uint8_t* output,
4002 pthreadpool_t threadpool);
4003
4004enum xnn_status xnn_create_subtract_nd_qu8(
4005 uint8_t input1_zero_point,
4006 float input1_scale,
4007 uint8_t input2_zero_point,
4008 float input2_scale,
4009 uint8_t output_zero_point,
4010 float output_scale,
4011 uint8_t output_min,
4012 uint8_t output_max,
4013 uint32_t flags,
4014 xnn_operator_t* subtract_op_out);
4015
4016enum xnn_status xnn_setup_subtract_nd_qu8(
4017 xnn_operator_t subtract_op,
4018 size_t num_input1_dims,
4019 const size_t* input1_shape,
4020 size_t num_input2_dims,
4021 const size_t* input2_shape,
4022 const uint8_t* input1,
4023 const uint8_t* input2,
4024 uint8_t* output,
4025 pthreadpool_t threadpool);
4026
4027enum xnn_status xnn_run_subtract_nd_qu8(
4028 size_t num_input1_dims,
4029 const size_t* input1_shape,
4030 uint8_t input1_zero_point,
4031 float input1_scale,
4032 size_t num_input2_dims,
4033 const size_t* input2_shape,
4034 uint8_t input2_zero_point,
4035 float input2_scale,
4036 const uint8_t* input1,
4037 const uint8_t* input2,
4038 uint8_t* output,
4039 uint8_t output_zero_point,
4040 float output_scale,
4041 uint8_t output_min,
4042 uint8_t output_max,
4043 uint32_t flags,
4044 pthreadpool_t threadpool);
4045
4046enum xnn_status xnn_create_tanh_nc_qu8(
4047 size_t channels,
4048 size_t input_stride,
4049 size_t output_stride,
4050 uint8_t input_zero_point,
4051 float input_scale,
4052 uint8_t output_zero_point,
4053 float output_scale,
4054 uint8_t output_min,
4055 uint8_t output_max,
4056 uint32_t flags,
4057 xnn_operator_t* tanh_op_out);
4058
4059enum xnn_status xnn_setup_tanh_nc_qu8(
4060 xnn_operator_t tanh_op,
4061 size_t batch_size,
4062 const uint8_t* input,
4063 uint8_t* output,
4064 pthreadpool_t threadpool);
4065
4066#endif // XNN_NO_QU8_OPERATORS
4067
4068#ifndef XNN_NO_S8_OPERATORS
4069
4070enum xnn_status xnn_create_clamp_nc_s8(
4071 size_t channels,
4072 size_t input_stride,
4073 size_t output_stride,
4074 int8_t output_min,
4075 int8_t output_max,
4076 uint32_t flags,
4077 xnn_operator_t* clamp_op_out);
4078
4079enum xnn_status xnn_setup_clamp_nc_s8(
4080 xnn_operator_t clamp_op,
4081 size_t batch_size,
4082 const int8_t* input,
4083 int8_t* output,
4084 pthreadpool_t threadpool);
4085
4086enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
4087 uint32_t input_padding_top,
4088 uint32_t input_padding_right,
4089 uint32_t input_padding_bottom,
4090 uint32_t input_padding_left,
4091 uint32_t pooling_height,
4092 uint32_t pooling_width,
4093 uint32_t stride_height,
4094 uint32_t stride_width,
4095 uint32_t dilation_height,
4096 uint32_t dilation_width,
4097 size_t channels,
4098 size_t input_pixel_stride,
4099 size_t output_pixel_stride,
4100 int8_t output_min,
4101 int8_t output_max,
4102 uint32_t flags,
4103 xnn_operator_t* max_pooling_op_out);
4104
4105enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
4106 xnn_operator_t max_pooling_op,
4107 size_t batch_size,
4108 size_t input_height,
4109 size_t input_width,
4110 const int8_t* input,
4111 int8_t* output,
4112 pthreadpool_t threadpool);
4113
4114enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
4115 size_t channels,
4116 size_t input_pixel_stride,
4117 size_t output_pixel_stride,
4118 uint32_t flags,
4119 xnn_operator_t* resize_op_out);
4120
4121enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
4122 xnn_operator_t resize_op,
4123 size_t batch_size,
4124 size_t input_height,
4125 size_t input_width,
4126 size_t output_height,
4127 size_t output_width,
4128 const int8_t* input,
4129 int8_t* output,
4130 pthreadpool_t threadpool);
4131
4132#endif // XNN_NO_S8_OPERATORS
4133
4134#ifndef XNN_NO_U8_OPERATORS
4135
4136enum xnn_status xnn_create_clamp_nc_u8(
4137 size_t channels,
4138 size_t input_stride,
4139 size_t output_stride,
4140 uint8_t output_min,
4141 uint8_t output_max,
4142 uint32_t flags,
4143 xnn_operator_t* clamp_op_out);
4144
4145enum xnn_status xnn_setup_clamp_nc_u8(
4146 xnn_operator_t clamp_op,
4147 size_t batch_size,
4148 const uint8_t* input,
4149 uint8_t* output,
4150 pthreadpool_t threadpool);
4151
4152enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
4153 uint32_t input_padding_top,
4154 uint32_t input_padding_right,
4155 uint32_t input_padding_bottom,
4156 uint32_t input_padding_left,
4157 uint32_t pooling_height,
4158 uint32_t pooling_width,
4159 uint32_t stride_height,
4160 uint32_t stride_width,
4161 uint32_t dilation_height,
4162 uint32_t dilation_width,
4163 size_t channels,
4164 size_t input_pixel_stride,
4165 size_t output_pixel_stride,
4166 uint8_t output_min,
4167 uint8_t output_max,
4168 uint32_t flags,
4169 xnn_operator_t* max_pooling_op_out);
4170
4171enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
4172 xnn_operator_t max_pooling_op,
4173 size_t batch_size,
4174 size_t input_height,
4175 size_t input_width,
4176 const uint8_t* input,
4177 uint8_t* output,
4178 pthreadpool_t threadpool);
4179
4180enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
4181 size_t channels,
4182 size_t input_pixel_stride,
4183 size_t output_pixel_stride,
4184 uint32_t flags,
4185 xnn_operator_t* resize_op_out);
4186
4187enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
4188 xnn_operator_t resize_op,
4189 size_t batch_size,
4190 size_t input_height,
4191 size_t input_width,
4192 size_t output_height,
4193 size_t output_width,
4194 const uint8_t* input,
4195 uint8_t* output,
4196 pthreadpool_t threadpool);
4197
4198#endif // XNN_NO_U8_OPERATORS
4199
4200#ifndef XNN_NO_X8_OPERATORS
4201
4202enum xnn_status xnn_create_copy_nc_x8(
4203 size_t channels,
4204 size_t input_stride,
4205 size_t output_stride,
4206 uint32_t flags,
4207 xnn_operator_t* copy_op_out);
4208
4209enum xnn_status xnn_setup_copy_nc_x8(
4210 xnn_operator_t copy_op,
4211 size_t batch_size,
4212 const void* input,
4213 void* output,
4214 pthreadpool_t threadpool);
4215
4216enum xnn_status xnn_create_channel_shuffle_nc_x8(
4217 size_t groups,
4218 size_t group_channels,
4219 size_t input_stride,
4220 size_t output_stride,
4221 uint32_t flags,
4222 xnn_operator_t* channel_shuffle_op_out);
4223
4224enum xnn_status xnn_setup_channel_shuffle_nc_x8(
4225 xnn_operator_t channel_shuffle_op,
4226 size_t batch_size,
4227 const void* input,
4228 void* output,
4229 pthreadpool_t threadpool);
4230
4231enum xnn_status xnn_create_constant_pad_nd_x8(
4232 const void* padding_value,
4233 uint32_t flags,
4234 xnn_operator_t* constant_pad_op_out);
4235
4236enum xnn_status xnn_setup_constant_pad_nd_x8(
4237 xnn_operator_t constant_pad_op,
4238 size_t num_dims,
4239 const size_t* input_shape,
4240 const size_t* pre_padding,
4241 const size_t* post_padding,
4242 const void* input,
4243 void* output,
4244 pthreadpool_t threadpool);
4245
4246enum xnn_status xnn_run_constant_pad_nd_x8(
4247 uint32_t flags,
4248 size_t num_dims,
4249 const size_t* input_shape,
4250 const size_t* pre_paddings,
4251 const size_t* post_paddings,
4252 const void* input,
4253 void* output,
4254 const void* padding_value,
4255 pthreadpool_t threadpool);
4256
4257enum xnn_status xnn_create_depth_to_space_nhwc_x8(
4258 size_t output_channels,
4259 size_t input_channel_stride,
4260 size_t output_channel_stride,
4261 uint32_t block_size,
4262 uint32_t flags,
4263 xnn_operator_t* depth_to_space_op_out);
4264
4265enum xnn_status xnn_setup_depth_to_space_nhwc_x8(
4266 xnn_operator_t depth_to_space_op,
4267 size_t batch_size,
4268 size_t input_height,
4269 size_t input_width,
4270 const void* input,
4271 void* output,
4272 pthreadpool_t threadpool);
4273
4274enum xnn_status xnn_create_slice_nd_x8(
4275 uint32_t flags,
4276 xnn_operator_t* slice_op_out);
4277
4278enum xnn_status xnn_setup_slice_nd_x8(
4279 xnn_operator_t slice_op,
4280 size_t num_dims,
4281 const size_t* input_shape,
4282 const size_t* offsets,
4283 const size_t* sizes,
4284 const void* input,
4285 void* output,
4286 pthreadpool_t threadpool);
4287
4288enum xnn_status xnn_create_space_to_depth_nhwc_x8(
4289 size_t input_channels,
4290 size_t input_channel_stride,
4291 size_t output_channel_stride,
4292 uint32_t block_size,
4293 uint32_t flags,
4294 xnn_operator_t* space_to_depth_op_out);
4295
4296enum xnn_status xnn_setup_space_to_depth_nhwc_x8(
4297 xnn_operator_t space_to_depth_op,
4298 size_t batch_size,
4299 size_t input_height,
4300 size_t input_width,
4301 const void* input,
4302 void* output,
4303 pthreadpool_t threadpool);
4304
4305enum xnn_status xnn_create_transpose_nd_x8(
4306 uint32_t flags,
4307 xnn_operator_t* transpose_op_out);
4308
4309enum xnn_status xnn_setup_transpose_nd_x8(
4310 xnn_operator_t transpose_op,
4311 const void* input,
4312 void* output,
4313 const size_t num_dims,
4314 const size_t* input_shape,
4315 const size_t* output_perm,
4316 pthreadpool_t threadpool);
4317
4318enum xnn_status xnn_run_transpose_nd_x8(
4319 const void* input,
4320 void* output,
4321 const size_t num_dims,
4322 const size_t* input_shape,
4323 const size_t* output_perm,
4324 uint32_t flags,
4325 pthreadpool_t threadpool);
4326
4327#endif // XNN_NO_X8_OPERATORS
4328
4329#ifndef XNN_NO_CVT_OPERATORS
4330
4331enum xnn_status xnn_create_convert_nc_f16_f32(
4332 size_t channels,
4333 size_t input_stride,
4334 size_t output_stride,
4335 uint32_t flags,
4336 xnn_operator_t* convert_op_out);
4337
4338enum xnn_status xnn_setup_convert_nc_f16_f32(
4339 xnn_operator_t convert_op,
4340 size_t batch_size,
4341 const void* input,
4342 float* output,
4343 pthreadpool_t threadpool);
4344
4345enum xnn_status xnn_run_convert_nc_f16_f32(
4346 size_t channels,
4347 size_t input_stride,
4348 size_t output_stride,
4349 size_t batch_size,
4350 const void* input,
4351 float* output,
4352 uint32_t flags,
4353 pthreadpool_t threadpool);
4354
4355enum xnn_status xnn_create_convert_nc_f32_f16(
4356 size_t channels,
4357 size_t input_stride,
4358 size_t output_stride,
4359 uint32_t flags,
4360 xnn_operator_t* convert_op_out);
4361
4362enum xnn_status xnn_setup_convert_nc_f32_f16(
4363 xnn_operator_t convert_op,
4364 size_t batch_size,
4365 const float* input,
4366 void* output,
4367 pthreadpool_t threadpool);
4368
4369enum xnn_status xnn_run_convert_nc_f32_f16(
4370 size_t channels,
4371 size_t input_stride,
4372 size_t output_stride,
4373 size_t batch_size,
4374 const float* input,
4375 void* output,
4376 uint32_t flags,
4377 pthreadpool_t threadpool);
4378
4379enum xnn_status xnn_create_convert_nc_f32_qs8(
4380 size_t channels,
4381 size_t input_stride,
4382 size_t output_stride,
4383 float output_scale,
4384 int8_t output_zero_point,
4385 int8_t output_min,
4386 int8_t output_max,
4387 uint32_t flags,
4388 xnn_operator_t* convert_op_out);
4389
4390enum xnn_status xnn_setup_convert_nc_f32_qs8(
4391 xnn_operator_t convert_op,
4392 size_t batch_size,
4393 const float* input,
4394 int8_t* output,
4395 pthreadpool_t threadpool);
4396
4397enum xnn_status xnn_run_convert_nc_f32_qs8(
4398 size_t channels,
4399 size_t input_stride,
4400 size_t output_stride,
4401 size_t batch_size,
4402 const float* input,
4403 int8_t* output,
4404 float output_scale,
4405 int8_t output_zero_point,
4406 uint32_t flags,
4407 pthreadpool_t threadpool);
4408
4409enum xnn_status xnn_create_convert_nc_f32_qu8(
4410 size_t channels,
4411 size_t input_stride,
4412 size_t output_stride,
4413 float output_scale,
4414 uint8_t output_zero_point,
4415 uint8_t output_min,
4416 uint8_t output_max,
4417 uint32_t flags,
4418 xnn_operator_t* convert_op_out);
4419
4420enum xnn_status xnn_setup_convert_nc_f32_qu8(
4421 xnn_operator_t convert_op,
4422 size_t batch_size,
4423 const float* input,
4424 uint8_t* output,
4425 pthreadpool_t threadpool);
4426
4427enum xnn_status xnn_run_convert_nc_f32_qu8(
4428 size_t channels,
4429 size_t input_stride,
4430 size_t output_stride,
4431 size_t batch_size,
4432 const float* input,
4433 uint8_t* output,
4434 float output_scale,
4435 uint8_t output_zero_point,
4436 uint32_t flags,
4437 pthreadpool_t threadpool);
4438
4439enum xnn_status xnn_create_convert_nc_qs8(
4440 size_t channels,
4441 size_t input_stride,
4442 size_t output_stride,
4443 float input_scale,
4444 int8_t input_zero_point,
4445 float output_scale,
4446 int8_t output_zero_point,
4447 uint32_t flags,
4448 xnn_operator_t* convert_op_out);
4449
4450enum xnn_status xnn_setup_convert_nc_qs8(
4451 xnn_operator_t convert_op,
4452 size_t batch_size,
4453 const int8_t* input,
4454 int8_t* output,
4455 pthreadpool_t threadpool);
4456
4457enum xnn_status xnn_create_convert_nc_qs8_f32(
4458 size_t channels,
4459 size_t input_stride,
4460 size_t output_stride,
4461 float input_scale,
4462 int8_t input_zero_point,
4463 uint32_t flags,
4464 xnn_operator_t* convert_op_out);
4465
4466enum xnn_status xnn_setup_convert_nc_qs8_f32(
4467 xnn_operator_t convert_op,
4468 size_t batch_size,
4469 const int8_t* input,
4470 float* output,
4471 pthreadpool_t threadpool);
4472
4473enum xnn_status xnn_run_convert_nc_qs8_f32(
4474 size_t channels,
4475 size_t input_stride,
4476 size_t output_stride,
4477 size_t batch_size,
4478 const int8_t* input,
4479 float* output,
4480 float input_scale,
4481 int8_t input_zero_point,
4482 uint32_t flags,
4483 pthreadpool_t threadpool);
4484
4485enum xnn_status xnn_create_convert_nc_qu8(
4486 size_t channels,
4487 size_t input_stride,
4488 size_t output_stride,
4489 float input_scale,
4490 uint8_t input_zero_point,
4491 float output_scale,
4492 uint8_t output_zero_point,
4493 uint32_t flags,
4494 xnn_operator_t* convert_op_out);
4495
4496enum xnn_status xnn_setup_convert_nc_qu8(
4497 xnn_operator_t convert_op,
4498 size_t batch_size,
4499 const uint8_t* input,
4500 uint8_t* output,
4501 pthreadpool_t threadpool);
4502
4503enum xnn_status xnn_create_convert_nc_qu8_f32(
4504 size_t channels,
4505 size_t input_stride,
4506 size_t output_stride,
4507 float input_scale,
4508 uint8_t input_zero_point,
4509 uint32_t flags,
4510 xnn_operator_t* convert_op_out);
4511
4512enum xnn_status xnn_setup_convert_nc_qu8_f32(
4513 xnn_operator_t convert_op,
4514 size_t batch_size,
4515 const uint8_t* input,
4516 float* output,
4517 pthreadpool_t threadpool);
4518
4519enum xnn_status xnn_run_convert_nc_qu8_f32(
4520 size_t channels,
4521 size_t input_stride,
4522 size_t output_stride,
4523 size_t batch_size,
4524 const uint8_t* input,
4525 float* output,
4526 float input_scale,
4527 uint8_t input_zero_point,
4528 uint32_t flags,
4529 pthreadpool_t threadpool);
4530
4531#endif // XNN_NO_CVT_OPERATORS
4532
4533#ifdef __cplusplus
4534} // extern "C"
4535#endif
4536