1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #ifndef TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_ |
16 | #define TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_ |
17 | |
18 | #include <memory> |
19 | #include <utility> |
20 | |
21 | #include "tensorflow/lite/c/common.h" |
22 | |
23 | namespace tflite { |
24 | |
25 | // This is the base class for TF Lite internal backend contexts (like a |
26 | // RUY-based cpu backend context class). A derived internal backend context is |
27 | // generally a collection of utilities (i.e. a thread pool etc.) for TF Lite to |
28 | // use certain kernel libraries, such as Gemmlowp, RUY, etc., to implement TF |
29 | // Lite operators. |
30 | class TfLiteInternalBackendContext { |
31 | public: |
32 | virtual ~TfLiteInternalBackendContext() {} |
33 | |
34 | // Set the maximum number of threads that could be used for parallelizing |
35 | // TfLite computation. |
36 | virtual void SetMaxNumThreads(int max_num_threads) = 0; |
37 | |
38 | // A context may internally cache prepacked versions of constant tensors for |
39 | // faster computation. This function will clear any caches on the context. |
40 | virtual void ClearCaches() = 0; |
41 | }; |
42 | |
43 | // This TfLiteExternalContext-derived class is the default |
44 | // 'kTfLiteCpuBackendContext'-typed context that's used internally in TF Lite |
45 | // framework. The primary purpose of having this class is to allow the same cpu |
46 | // backend context to be sharable among a set of TF Lite interpreters so that |
47 | // certain system costs are saved, like saving the cost of having multiple |
48 | // thread pools in each separate cpu backend context etc.. |
49 | // |
50 | // Note: as of 2019/07/19, such context sharing among a set of interpreters will |
51 | // break the execution if these interpreters are invoked simultaneously. It |
52 | // works only when these context-sharing interpreters are invoked in a |
53 | // serialized way. Here's an example to illustrate the context sharing among 2 |
54 | // TF Lite interpreters: |
55 | // |
56 | // TfLiteExternalContext* global_ctxt = new ExternalCpuBackendContext(); |
57 | // interpreter1 = /*...*/; |
58 | // interpreter1->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt); |
59 | // interpreter2 = /*...*/; |
60 | // interpreter2->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt); |
61 | // |
62 | // interpreter1->SetNumThreads(2); |
63 | // interpreter1->Invoke(); |
64 | // |
65 | // interpreter2->SetNumThreads(4); |
66 | // interpreter2->Invoke(); |
67 | // |
68 | // After sharing the context, calling 'SetNumThreads' on any of the |
69 | // context-sharing interpreters will have the global impact as it also refreshes |
70 | // the #thread info in the global cpu backend context (i.e. 'global_ctxt' above) |
71 | // that affects how much parallelism an interpreter invocation will use. |
72 | // Therefore, if different number of threads are used among different |
73 | // interpreters, don't call 'SetNumThreads' consecutively but call it |
74 | // separately between each interpreter's invocation as illustrated above. |
75 | // |
76 | // Note: it is the responsibility of the user of this context (i.e. a |
77 | // TFLiteInterpreter) to clear any state from the internal backend |
78 | // context if/when the interpreter no longer needs the shared context. |
79 | // See, e.g., TFLiteInterpreter destructor clears caches in the case of a |
80 | // shared ExternalCpuBackendContext. |
81 | class ExternalCpuBackendContext : public TfLiteExternalContext { |
82 | public: |
83 | ExternalCpuBackendContext(); |
84 | ~ExternalCpuBackendContext() {} |
85 | |
86 | void set_internal_backend_context( |
87 | std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context) { |
88 | internal_backend_context_ = std::move(internal_backend_context); |
89 | } |
90 | |
91 | TfLiteInternalBackendContext* internal_backend_context() const { |
92 | return internal_backend_context_.get(); |
93 | } |
94 | |
95 | private: |
96 | // Note the actual internal backend context object is lazily initialized. |
97 | std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context_; |
98 | |
99 | ExternalCpuBackendContext(const ExternalCpuBackendContext&) = delete; |
100 | ExternalCpuBackendContext& operator=(const ExternalCpuBackendContext&) = |
101 | delete; |
102 | }; |
103 | |
104 | } // namespace tflite |
105 | |
106 | #endif // TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_ |
107 | |