1 | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_ |
17 | #define TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_ |
18 | |
19 | #include "tensorflow/lite/c/common.h" |
20 | |
21 | #ifdef __cplusplus |
22 | extern "C" { |
23 | #endif // __cplusplus |
24 | |
25 | // Enable XNNPACK acceleration for signed quantized 8-bit inference. |
26 | // This includes operators with channel-wise quantized weights. |
27 | #define TFLITE_XNNPACK_DELEGATE_FLAG_QS8 0x00000001 |
28 | // Enable XNNPACK acceleration for unsigned quantized 8-bit inference. |
29 | #define TFLITE_XNNPACK_DELEGATE_FLAG_QU8 0x00000002 |
30 | // Force FP16 inference for FP32 operators. |
31 | #define TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 0x00000004 |
32 | |
33 | struct TfLiteXNNPackDelegateWeightsCache; |
34 | |
35 | typedef struct { |
36 | // Number of threads to use in the thread pool. |
37 | // 0 or negative value means no thread pool used. |
38 | int32_t num_threads; |
39 | // Bitfield with any combination of the following binary options: |
40 | // - TFLITE_XNNPACK_DELEGATE_FLAG_QS8 |
41 | // - TFLITE_XNNPACK_DELEGATE_FLAG_QU8 |
42 | // - TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 |
43 | uint32_t flags; |
44 | // Cache for packed weights, can be shared between multiple instances of |
45 | // delegates. |
46 | struct TfLiteXNNPackDelegateWeightsCache* weights_cache; |
47 | } TfLiteXNNPackDelegateOptions; |
48 | |
49 | // Returns a structure with the default XNNPack delegate options. |
50 | TFL_CAPI_EXPORT TfLiteXNNPackDelegateOptions |
51 | TfLiteXNNPackDelegateOptionsDefault(); |
52 | |
53 | // Creates a new delegate instance that need to be destroyed with |
54 | // `TfLiteXNNPackDelegateDelete` when delegate is no longer used by TFLite. |
55 | // When `options` is set to `nullptr`, default values are used (see |
56 | // implementation of TfLiteXNNPackDelegateOptionsDefault in the .cc file for |
57 | // details). |
58 | TFL_CAPI_EXPORT TfLiteDelegate* TfLiteXNNPackDelegateCreate( |
59 | const TfLiteXNNPackDelegateOptions* options); |
60 | |
61 | // Returns the pthreadpool_t object used for parallelization in XNNPACK. |
62 | // Can return NULL if the XNNPack delegate is single-threaded. |
63 | // |
64 | // WARNING: This API is experimental and subject to change. |
65 | TFL_CAPI_EXPORT void* TfLiteXNNPackDelegateGetThreadPool( |
66 | TfLiteDelegate* delegate); |
67 | |
68 | // Destroys a delegate created with `TfLiteXNNPackDelegateCreate` call. |
69 | TFL_CAPI_EXPORT void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate); |
70 | |
71 | // Creates a new weights cache that can be shared with multiple delegate |
72 | // instances. Prefer TfLiteXNNPackDelegateWeightsCacheCreateWithSize which can |
73 | // reduce memory bandwidth. |
74 | TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache* |
75 | TfLiteXNNPackDelegateWeightsCacheCreate(); |
76 | // Creates a new weights cache with a specified initial size that can be shared |
77 | // with multiple delegate instances. The weights cache can hold up to size bytes |
78 | // without growing. |
79 | TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache* |
80 | TfLiteXNNPackDelegateWeightsCacheCreateWithSize(size_t size); |
81 | // Soft-finalize a weights cache. Extra space will be left in the weights cache |
82 | // to allow for cache "insertion" only if it is a cache hit. This has memory |
83 | // overhead compared to TfLiteXNNPackDelegateWeightsCacheFinalizeHard. Use this |
84 | // if the number of interpreter instances using XNNPACK delegate is not fixed |
85 | // (e.g. created based on workload in a server daemon). |
86 | // Returns true on success, false on error. |
87 | TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeSoft( |
88 | struct TfLiteXNNPackDelegateWeightsCache* cache); |
89 | // Hard-finalize a weights cache, cache is effectively frozen and no more cache |
90 | // operations are allowed. Memory is resized to smallest possible. Use this if |
91 | // the number of interpreter instances using XNNPACK delegate can be fixed and |
92 | // all creation of instances can happen up front. This has the lowest memory |
93 | // usage. |
94 | // Returns true on success, false on error. |
95 | TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeHard( |
96 | struct TfLiteXNNPackDelegateWeightsCache* cache); |
97 | // Destroys a weights cache created with |
98 | // `TfLiteXNNPackDelegateWeightsCacheCreate` call. |
99 | TFL_CAPI_EXPORT void TfLiteXNNPackDelegateWeightsCacheDelete( |
100 | struct TfLiteXNNPackDelegateWeightsCache* cache); |
101 | |
102 | #ifdef __cplusplus |
103 | } |
104 | #endif // __cplusplus |
105 | |
106 | #endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_ |
107 | |