1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/lite/interpreter.h" |
17 | |
18 | #include <stddef.h> |
19 | #include <stdlib.h> |
20 | |
21 | #include <cstdint> |
22 | #include <functional> |
23 | #include <memory> |
24 | #include <string> |
25 | #include <utility> |
26 | #include <vector> |
27 | |
28 | #include "ruy/denormal.h" // from @ruy |
29 | #include "tensorflow/lite/allocation.h" |
30 | #include "tensorflow/lite/core/api/error_reporter.h" |
31 | #include "tensorflow/lite/core/api/profiler.h" |
32 | #include "tensorflow/lite/external_cpu_backend_context.h" |
33 | #include "tensorflow/lite/interpreter_options.h" |
34 | #include "tensorflow/lite/minimal_logging.h" |
35 | #include "tensorflow/lite/stderr_reporter.h" |
36 | #include "tensorflow/lite/util.h" |
37 | |
38 | // TODO(b/139446230): Move to portable platform header. |
39 | #if defined(__ANDROID__) |
40 | #define TFLITE_IS_MOBILE_PLATFORM |
41 | #endif // defined(__ANDROID__) |
42 | |
43 | #if defined(__APPLE__) |
44 | #include "TargetConditionals.h" |
45 | #if TARGET_IPHONE_SIMULATOR |
46 | #define TFLITE_IS_MOBILE_PLATFORM |
47 | #elif TARGET_OS_IPHONE |
48 | #define TFLITE_IS_MOBILE_PLATFORM |
49 | #endif |
50 | #endif // defined(__APPLE__) |
51 | |
52 | // TODO(b/132087118): move static_assert to c_api_internal when compiled with |
53 | // C++. |
54 | static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t), |
55 | "Float 16 type must be 16 bits." ); |
56 | |
57 | namespace tflite { |
58 | |
59 | namespace { |
60 | |
61 | // Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams. |
62 | TfLiteQuantization GetQuantizationFromLegacy( |
63 | const TfLiteQuantizationParams& legacy_quantization) { |
64 | TfLiteQuantization quantization; |
65 | quantization.type = kTfLiteAffineQuantization; |
66 | auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>( |
67 | malloc(sizeof(TfLiteAffineQuantization))); |
68 | affine_quantization->scale = TfLiteFloatArrayCreate(1); |
69 | affine_quantization->zero_point = TfLiteIntArrayCreate(1); |
70 | affine_quantization->scale->data[0] = legacy_quantization.scale; |
71 | affine_quantization->zero_point->data[0] = legacy_quantization.zero_point; |
72 | quantization.params = affine_quantization; |
73 | |
74 | return quantization; |
75 | } |
76 | |
77 | // TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro |
78 | // temporarily because delegate-specific error codes are either not retrievable |
79 | // at the moment, which we will add later. |
80 | #define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \ |
81 | do { \ |
82 | TfLiteStatus status = (a); \ |
83 | runtime_event.set_runtime_status(/*delegate_status=*/0, \ |
84 | static_cast<int64_t>(status)); \ |
85 | TF_LITE_ENSURE_STATUS(status); \ |
86 | } while (0) |
87 | |
88 | } // namespace |
89 | |
90 | Interpreter::Interpreter(ErrorReporter* error_reporter) |
91 | : error_reporter_(error_reporter ? error_reporter |
92 | : DefaultErrorReporter()) { |
93 | // TODO(b/128420794): Include the TFLite runtime version in the log. |
94 | // Prod logging is useful for mobile platforms where scraping console logs is |
95 | // critical for debugging. |
96 | #if defined(TFLITE_IS_MOBILE_PLATFORM) |
97 | TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime." ); |
98 | #else |
99 | TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime." ); |
100 | #endif |
101 | |
102 | // There's always at least 1 subgraph which is the primary subgraph. |
103 | AddSubgraphs(1); |
104 | context_ = primary_subgraph().context(); |
105 | |
106 | // Reserve some space for the tensors to avoid excessive resizing. |
107 | for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) { |
108 | external_contexts_[i] = nullptr; |
109 | } |
110 | |
111 | // This operation is cheap because we allocate the CPU context resources (i.e. |
112 | // threads) lazily. |
113 | own_external_cpu_backend_context_ = |
114 | std::make_unique<ExternalCpuBackendContext>(); |
115 | external_contexts_[kTfLiteCpuBackendContext] = |
116 | own_external_cpu_backend_context_.get(); |
117 | } |
118 | |
119 | Interpreter::~Interpreter() { |
120 | // The owned external Cpu Backend Context will go out of scope with this |
121 | // interpreter. If we have an external backend context that is not |
122 | // owned, we need to clear the cache for other interpreters that may |
123 | // use the context. |
124 | if (external_contexts_[kTfLiteCpuBackendContext] && |
125 | (external_contexts_[kTfLiteCpuBackendContext] != |
126 | own_external_cpu_backend_context_.get())) { |
127 | ExternalCpuBackendContext* external_context = |
128 | static_cast<ExternalCpuBackendContext*>( |
129 | external_contexts_[kTfLiteCpuBackendContext]); |
130 | TfLiteInternalBackendContext* internal_context = |
131 | external_context->internal_backend_context(); |
132 | if (internal_context) { |
133 | // This call may have negative performance impacts on the next inference |
134 | // for any interpreter using this context. The cache will be refreshed |
135 | // by the next inference. |
136 | internal_context->ClearCaches(); |
137 | } |
138 | } |
139 | } |
140 | |
141 | void Interpreter::SetExternalContext(TfLiteExternalContextType type, |
142 | TfLiteExternalContext* ctx) { |
143 | if (ctx == own_external_cpu_backend_context_.get()) { |
144 | error_reporter_->Report( |
145 | "WARNING: The passed external context is identical to the internally " |
146 | "owned one." ); |
147 | return; |
148 | } |
149 | |
150 | // We have an internally owned external context of kTfLiteCpuBackendContext. |
151 | // If it's overwritten here, we will release the resource of the internally |
152 | // owned external context. |
153 | // Note: the 'max thread count' info associated with the overwritten context |
154 | // will be lost here, and such info is now determined by the new context, thus |
155 | // affecting how much parallelism a TFLite op would have. |
156 | if (kTfLiteCpuBackendContext == type && |
157 | external_contexts_[kTfLiteCpuBackendContext] == |
158 | own_external_cpu_backend_context_.get()) { |
159 | own_external_cpu_backend_context_.reset(); |
160 | } |
161 | |
162 | // This essentially changes the "external_contexts_[type]". |
163 | primary_subgraph().SetExternalContext(type, ctx); |
164 | } |
165 | |
166 | TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) { |
167 | return primary_subgraph().SetInputs(std::move(inputs)); |
168 | } |
169 | |
170 | TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) { |
171 | return primary_subgraph().SetOutputs(std::move(outputs)); |
172 | } |
173 | |
174 | TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) { |
175 | return primary_subgraph().SetVariables(std::move(variables)); |
176 | } |
177 | |
178 | TfLiteStatus Interpreter::AllocateTensors() { |
179 | // Apply the default delegate that TFLite will enable at this point to allow |
180 | // other user-level delegates to be applied first. Only returns error when |
181 | // the status is kTfLiteError. For other statuses, it will fall back to the |
182 | // default implementation. |
183 | if (ApplyLazyDelegateProviders() == kTfLiteError) return kTfLiteError; |
184 | |
185 | return primary_subgraph().AllocateTensors(); |
186 | } |
187 | |
188 | void Interpreter::AddSubgraphs(int subgraphs_to_add, |
189 | int* first_new_subgraph_index) { |
190 | const size_t base_index = subgraphs_.size(); |
191 | if (first_new_subgraph_index) *first_new_subgraph_index = base_index; |
192 | |
193 | subgraphs_.reserve(base_index + subgraphs_to_add); |
194 | for (int i = 0; i < subgraphs_to_add; ++i) { |
195 | Subgraph* subgraph = new Subgraph( |
196 | error_reporter_, external_contexts_, &subgraphs_, &resources_, |
197 | &resource_ids_, &initialization_status_map_, subgraphs_.size()); |
198 | subgraphs_.emplace_back(subgraph); |
199 | } |
200 | } |
201 | |
202 | TfLiteStatus Interpreter::AddNodeWithParameters( |
203 | const std::vector<int>& inputs, const std::vector<int>& outputs, |
204 | const char* init_data, size_t init_data_size, void* builtin_data, |
205 | const TfLiteRegistration* registration, int* node_index) { |
206 | return primary_subgraph().AddNodeWithParameters( |
207 | inputs, outputs, {}, init_data, init_data_size, builtin_data, |
208 | registration, node_index); |
209 | } |
210 | |
211 | TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, |
212 | const std::vector<int>& dims) { |
213 | return primary_subgraph().ResizeInputTensor(tensor_index, dims); |
214 | } |
215 | |
216 | TfLiteStatus Interpreter::ResizeInputTensorStrict( |
217 | int tensor_index, const std::vector<int>& dims) { |
218 | return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims); |
219 | } |
220 | |
221 | TfLiteStatus Interpreter::Invoke() { |
222 | ScopedRuntimeInstrumentationProfile scoped_runtime_event(root_profiler_.get(), |
223 | "invoke" ); |
224 | |
225 | // Denormal floating point numbers could cause significant slowdown on |
226 | // platforms like x86, therefore, we suppress denormals here to prevent this |
227 | // from happening. |
228 | ruy::ScopedSuppressDenormals suppress_denormals; |
229 | |
230 | TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION( |
231 | scoped_runtime_event, primary_subgraph().Invoke()); |
232 | |
233 | if (!allow_buffer_handle_output_) { |
234 | for (int tensor_index : outputs()) { |
235 | TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION( |
236 | scoped_runtime_event, |
237 | primary_subgraph().EnsureTensorDataIsReadable(tensor_index)); |
238 | } |
239 | } |
240 | |
241 | return kTfLiteOk; |
242 | } |
243 | |
244 | TfLiteStatus Interpreter::AddTensors(int tensors_to_add, |
245 | int* first_new_tensor_index) { |
246 | return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index); |
247 | } |
248 | |
249 | TfLiteStatus Interpreter::SetTensorParametersReadOnly( |
250 | int tensor_index, TfLiteType type, const char* name, |
251 | const std::vector<int>& dims, TfLiteQuantization quantization, |
252 | const char* buffer, size_t bytes, const Allocation* allocation) { |
253 | return primary_subgraph().SetTensorParametersReadOnly( |
254 | tensor_index, type, name, dims.size(), dims.data(), quantization, buffer, |
255 | bytes, allocation); |
256 | } |
257 | |
258 | TfLiteStatus Interpreter::SetTensorParametersReadWrite( |
259 | int tensor_index, TfLiteType type, const char* name, |
260 | const std::vector<int>& dims, TfLiteQuantization quantization, |
261 | bool is_variable) { |
262 | return primary_subgraph().SetTensorParametersReadWrite( |
263 | tensor_index, type, name, dims.size(), dims.data(), quantization, |
264 | is_variable); |
265 | } |
266 | |
267 | TfLiteStatus Interpreter::SetTensorParametersReadOnly( |
268 | int tensor_index, TfLiteType type, const char* name, const size_t rank, |
269 | const int* dims, TfLiteQuantizationParams quantization, const char* buffer, |
270 | size_t bytes, const Allocation* allocation) { |
271 | TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization); |
272 | return primary_subgraph().SetTensorParametersReadOnly( |
273 | tensor_index, type, name, rank, dims, new_quantization, buffer, bytes, |
274 | allocation); |
275 | } |
276 | |
277 | TfLiteStatus Interpreter::SetTensorParametersReadWrite( |
278 | int tensor_index, TfLiteType type, const char* name, const size_t rank, |
279 | const int* dims, TfLiteQuantizationParams quantization, bool is_variable, |
280 | const size_t rank_dims_signature, const int* dims_signature) { |
281 | TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization); |
282 | return primary_subgraph().SetTensorParametersReadWrite( |
283 | tensor_index, type, name, rank, dims, new_quantization, is_variable, |
284 | rank_dims_signature, dims_signature); |
285 | } |
286 | |
287 | TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) { |
288 | return primary_subgraph().SetExecutionPlan(new_plan); |
289 | } |
290 | |
291 | TfLiteStatus Interpreter::SetNumThreads(int num_threads) { |
292 | if (num_threads < -1) { |
293 | context_->ReportError(context_, |
294 | "num_threads should be >=0 or just -1 to let TFLite " |
295 | "runtime set the value." ); |
296 | return kTfLiteError; |
297 | } |
298 | |
299 | // num_threads == 0 has the same effect as num_threads == 1. |
300 | num_threads = num_threads == 0 ? 1 : num_threads; |
301 | for (auto& subgraph : subgraphs_) { |
302 | subgraph->context()->recommended_num_threads = num_threads; |
303 | } |
304 | |
305 | for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) { |
306 | auto* c = external_contexts_[i]; |
307 | if (c && c->Refresh) { |
308 | c->Refresh(context_); |
309 | } |
310 | } |
311 | return kTfLiteOk; |
312 | } |
313 | |
314 | TfLiteStatus Interpreter::ApplyLazyDelegateProviders() { |
315 | if (lazy_delegate_providers_.empty() || IsFullyDelegated()) return kTfLiteOk; |
316 | |
317 | // We only apply lazy delegate providers once. |
318 | TfLiteDelegateCreators delegate_providers; |
319 | delegate_providers.swap(lazy_delegate_providers_); |
320 | |
321 | TFLITE_LOG(TFLITE_LOG_INFO, |
322 | "Applying %zu TensorFlow Lite delegate(s) lazily." , |
323 | delegate_providers.size()); |
324 | // At the momement, XNNPACK delegate is the only one that might be applied |
325 | // by default, in which case, the execution will fall back to default |
326 | // implementation if the XNNPACK delegate fails to be applied. |
327 | for (size_t i = 0; i < delegate_providers.size(); ++i) { |
328 | auto delegate_ptr = |
329 | delegate_providers[i](context_->recommended_num_threads); |
330 | // Note when XNNPACK-by-default is disabled, the corresponding creator (i.e. |
331 | // tflite::MaybeCreateXNNPACKDelegate(...)) will return a nullptr. |
332 | // Therefore, we simply continue with the next one. |
333 | if (delegate_ptr == nullptr) continue; |
334 | auto status = ModifyGraphWithDelegateImpl(std::move(delegate_ptr)); |
335 | switch (status) { |
336 | case kTfLiteOk: |
337 | TFLITE_LOG( |
338 | TFLITE_LOG_INFO, |
339 | "Successfully applied the default TensorFlow Lite " |
340 | "delegate indexed at %zu.\n *NOTE*: because a delegate has been " |
341 | "applied, the precision of computations should be unchanged, but " |
342 | "the exact output tensor values may have changed. If such output " |
343 | "values are checked in your code, like in your tests etc., please " |
344 | "consider increasing error tolerance for the check." , |
345 | i); |
346 | break; |
347 | case kTfLiteError: |
348 | TF_LITE_REPORT_ERROR(error_reporter_, |
349 | "Failed to apply the default TensorFlow Lite " |
350 | "delegate indexed at %zu." , |
351 | i); |
352 | return kTfLiteError; |
353 | case kTfLiteDelegateError: |
354 | TFLITE_LOG( |
355 | TFLITE_LOG_INFO, |
356 | "Error in applying the default TensorFlow Lite delegate indexed " |
357 | "at %zu, and all previously applied delegates are reverted." , |
358 | i); |
359 | return kTfLiteDelegateError; |
360 | case kTfLiteApplicationError: |
361 | TFLITE_LOG( |
362 | TFLITE_LOG_INFO, |
363 | "Failed to apply the default TensorFlow Lite delegate indexed at " |
364 | "%zu because of incompatibility between runtime and delegate. " |
365 | "Ignoring the error, and continuing anyway." , |
366 | i); |
367 | return kTfLiteApplicationError; |
368 | case kTfLiteUnresolvedOps: |
369 | TFLITE_LOG( |
370 | TFLITE_LOG_INFO, |
371 | "Failed to apply the default TensorFlow Lite delegate indexed at " |
372 | "%zu because of unresolved ops (which could be resolved by " |
373 | "another delegate). Ignoring the error, and continuing anyway." , |
374 | i); |
375 | return kTfLiteUnresolvedOps; |
376 | default: |
377 | TF_LITE_REPORT_ERROR(error_reporter_, |
378 | "Unknown status (%d) after applying the default " |
379 | "TensorFlow Lite delegate indexed at %zu." , |
380 | status, i); |
381 | return kTfLiteError; |
382 | } |
383 | } |
384 | return kTfLiteOk; |
385 | } |
386 | |
387 | TfLiteStatus Interpreter::ModifyGraphWithDelegateImpl( |
388 | TfLiteDelegate* delegate) { |
389 | TfLiteStatus status = kTfLiteOk; |
390 | for (auto& subgraph : subgraphs_) { |
391 | if (IsValidationSubgraph(subgraph->GetName().c_str())) { |
392 | continue; |
393 | } |
394 | status = subgraph->ModifyGraphWithDelegate(delegate); |
395 | if (status != kTfLiteOk) { |
396 | break; |
397 | } |
398 | } |
399 | // Delegate-specific errors can be recovered from by restoring Interpreter to |
400 | // its original state. |
401 | if (status == kTfLiteDelegateError) { |
402 | TF_LITE_ENSURE_STATUS(RemoveAllDelegates()); |
403 | } |
404 | return status; |
405 | } |
406 | |
407 | TfLiteStatus Interpreter::RemoveAllDelegates() { |
408 | for (auto& subgraph : subgraphs_) { |
409 | TF_LITE_ENSURE_STATUS(subgraph->RemoveAllDelegates()); |
410 | } |
411 | return kTfLiteOk; |
412 | } |
413 | |
414 | TfLiteStatus Interpreter::SetMetadata( |
415 | const std::map<std::string, std::string>& metadata) { |
416 | metadata_ = metadata; |
417 | for (int subgraph_index = 0; subgraph_index < subgraphs_.size(); |
418 | ++subgraph_index) { |
419 | TF_LITE_ENSURE_STATUS(subgraphs_[subgraph_index]->SetMetadata(&metadata_)); |
420 | } |
421 | return kTfLiteOk; |
422 | } |
423 | |
424 | bool Interpreter::IsFullyDelegated() const { |
425 | return primary_subgraph().IsFullyDelegated(); |
426 | } |
427 | |
428 | void Interpreter::SetProfilerImpl(std::unique_ptr<Profiler> profiler) { |
429 | if (profiler == nullptr) { |
430 | root_profiler_ = nullptr; |
431 | return; |
432 | } |
433 | if (root_profiler_ == nullptr) { |
434 | root_profiler_ = std::make_unique<profiling::RootProfiler>(); |
435 | } else { |
436 | // Removes all previously registered profilers. |
437 | root_profiler_->RemoveChildProfilers(); |
438 | } |
439 | root_profiler_->AddProfiler(std::move(profiler)); |
440 | SetSubgraphProfiler(); |
441 | } |
442 | |
443 | void Interpreter::SetSubgraphProfiler() { |
444 | for (int subgraph_index = 0; subgraph_index < subgraphs_.size(); |
445 | ++subgraph_index) { |
446 | subgraphs_[subgraph_index]->SetProfiler(root_profiler_.get(), |
447 | subgraph_index); |
448 | } |
449 | } |
450 | |
451 | TfLiteStatus Interpreter::ApplyOptionsImpl(InterpreterOptions* options) { |
452 | if (options == nullptr) { |
453 | return kTfLiteOk; |
454 | } |
455 | options_ = std::make_unique<InterpreterOptions>(*options); |
456 | |
457 | // Set InterpreterOptions object to SubGraph. |
458 | for (auto& subgraph : subgraphs_) { |
459 | subgraph->SetOptions(options_.get()); |
460 | } |
461 | |
462 | // Handle `experimental_dynamic_allocation_for_large_tensors_`. |
463 | if (options->GetDynamicAllocationForLargeTensors() > 0) { |
464 | for (auto& subgraph : subgraphs_) { |
465 | subgraph->OptimizeMemoryForLargeTensors( |
466 | options->GetDynamicAllocationForLargeTensors()); |
467 | } |
468 | } |
469 | return kTfLiteOk; |
470 | } |
471 | |
472 | } // namespace tflite |
473 | |