1/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
17#define TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
18
19#include "tensorflow/lite/c/common.h"
20
21#ifdef __cplusplus
22extern "C" {
23#endif // __cplusplus
24
25// Enable XNNPACK acceleration for signed quantized 8-bit inference.
26// This includes operators with channel-wise quantized weights.
27#define TFLITE_XNNPACK_DELEGATE_FLAG_QS8 0x00000001
28// Enable XNNPACK acceleration for unsigned quantized 8-bit inference.
29#define TFLITE_XNNPACK_DELEGATE_FLAG_QU8 0x00000002
30// Force FP16 inference for FP32 operators.
31#define TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 0x00000004
32
33struct TfLiteXNNPackDelegateWeightsCache;
34
35typedef struct {
36 // Number of threads to use in the thread pool.
37 // 0 or negative value means no thread pool used.
38 int32_t num_threads;
39 // Bitfield with any combination of the following binary options:
40 // - TFLITE_XNNPACK_DELEGATE_FLAG_QS8
41 // - TFLITE_XNNPACK_DELEGATE_FLAG_QU8
42 // - TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16
43 uint32_t flags;
44 // Cache for packed weights, can be shared between multiple instances of
45 // delegates.
46 struct TfLiteXNNPackDelegateWeightsCache* weights_cache;
47} TfLiteXNNPackDelegateOptions;
48
49// Returns a structure with the default XNNPack delegate options.
50TFL_CAPI_EXPORT TfLiteXNNPackDelegateOptions
51TfLiteXNNPackDelegateOptionsDefault();
52
53// Creates a new delegate instance that need to be destroyed with
54// `TfLiteXNNPackDelegateDelete` when delegate is no longer used by TFLite.
55// When `options` is set to `nullptr`, default values are used (see
56// implementation of TfLiteXNNPackDelegateOptionsDefault in the .cc file for
57// details).
58TFL_CAPI_EXPORT TfLiteDelegate* TfLiteXNNPackDelegateCreate(
59 const TfLiteXNNPackDelegateOptions* options);
60
61// Returns the pthreadpool_t object used for parallelization in XNNPACK.
62// Can return NULL if the XNNPack delegate is single-threaded.
63//
64// WARNING: This API is experimental and subject to change.
65TFL_CAPI_EXPORT void* TfLiteXNNPackDelegateGetThreadPool(
66 TfLiteDelegate* delegate);
67
68// Destroys a delegate created with `TfLiteXNNPackDelegateCreate` call.
69TFL_CAPI_EXPORT void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate);
70
71// Creates a new weights cache that can be shared with multiple delegate
72// instances. Prefer TfLiteXNNPackDelegateWeightsCacheCreateWithSize which can
73// reduce memory bandwidth.
74TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache*
75TfLiteXNNPackDelegateWeightsCacheCreate();
76// Creates a new weights cache with a specified initial size that can be shared
77// with multiple delegate instances. The weights cache can hold up to size bytes
78// without growing.
79TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache*
80TfLiteXNNPackDelegateWeightsCacheCreateWithSize(size_t size);
81// Soft-finalize a weights cache. Extra space will be left in the weights cache
82// to allow for cache "insertion" only if it is a cache hit. This has memory
83// overhead compared to TfLiteXNNPackDelegateWeightsCacheFinalizeHard. Use this
84// if the number of interpreter instances using XNNPACK delegate is not fixed
85// (e.g. created based on workload in a server daemon).
86// Returns true on success, false on error.
87TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeSoft(
88 struct TfLiteXNNPackDelegateWeightsCache* cache);
89// Hard-finalize a weights cache, cache is effectively frozen and no more cache
90// operations are allowed. Memory is resized to smallest possible. Use this if
91// the number of interpreter instances using XNNPACK delegate can be fixed and
92// all creation of instances can happen up front. This has the lowest memory
93// usage.
94// Returns true on success, false on error.
95TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeHard(
96 struct TfLiteXNNPackDelegateWeightsCache* cache);
97// Destroys a weights cache created with
98// `TfLiteXNNPackDelegateWeightsCacheCreate` call.
99TFL_CAPI_EXPORT void TfLiteXNNPackDelegateWeightsCacheDelete(
100 struct TfLiteXNNPackDelegateWeightsCache* cache);
101
102#ifdef __cplusplus
103}
104#endif // __cplusplus
105
106#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
107