1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/lite/interpreter.h"
17
18#include <stddef.h>
19#include <stdlib.h>
20
21#include <cstdint>
22#include <functional>
23#include <memory>
24#include <string>
25#include <utility>
26#include <vector>
27
28#include "ruy/denormal.h" // from @ruy
29#include "tensorflow/lite/allocation.h"
30#include "tensorflow/lite/core/api/error_reporter.h"
31#include "tensorflow/lite/core/api/profiler.h"
32#include "tensorflow/lite/external_cpu_backend_context.h"
33#include "tensorflow/lite/interpreter_options.h"
34#include "tensorflow/lite/minimal_logging.h"
35#include "tensorflow/lite/stderr_reporter.h"
36#include "tensorflow/lite/util.h"
37
38// TODO(b/139446230): Move to portable platform header.
39#if defined(__ANDROID__)
40#define TFLITE_IS_MOBILE_PLATFORM
41#endif // defined(__ANDROID__)
42
43#if defined(__APPLE__)
44#include "TargetConditionals.h"
45#if TARGET_IPHONE_SIMULATOR
46#define TFLITE_IS_MOBILE_PLATFORM
47#elif TARGET_OS_IPHONE
48#define TFLITE_IS_MOBILE_PLATFORM
49#endif
50#endif // defined(__APPLE__)
51
52// TODO(b/132087118): move static_assert to c_api_internal when compiled with
53// C++.
54static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
55 "Float 16 type must be 16 bits.");
56
57namespace tflite {
58
59namespace {
60
61// Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
62TfLiteQuantization GetQuantizationFromLegacy(
63 const TfLiteQuantizationParams& legacy_quantization) {
64 TfLiteQuantization quantization;
65 quantization.type = kTfLiteAffineQuantization;
66 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
67 malloc(sizeof(TfLiteAffineQuantization)));
68 affine_quantization->scale = TfLiteFloatArrayCreate(1);
69 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
70 affine_quantization->scale->data[0] = legacy_quantization.scale;
71 affine_quantization->zero_point->data[0] = legacy_quantization.zero_point;
72 quantization.params = affine_quantization;
73
74 return quantization;
75}
76
77// TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
78// temporarily because delegate-specific error codes are either not retrievable
79// at the moment, which we will add later.
80#define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
81 do { \
82 TfLiteStatus status = (a); \
83 runtime_event.set_runtime_status(/*delegate_status=*/0, \
84 static_cast<int64_t>(status)); \
85 TF_LITE_ENSURE_STATUS(status); \
86 } while (0)
87
88} // namespace
89
90Interpreter::Interpreter(ErrorReporter* error_reporter)
91 : error_reporter_(error_reporter ? error_reporter
92 : DefaultErrorReporter()) {
93 // TODO(b/128420794): Include the TFLite runtime version in the log.
94 // Prod logging is useful for mobile platforms where scraping console logs is
95 // critical for debugging.
96#if defined(TFLITE_IS_MOBILE_PLATFORM)
97 TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
98#else
99 TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
100#endif
101
102 // There's always at least 1 subgraph which is the primary subgraph.
103 AddSubgraphs(1);
104 context_ = primary_subgraph().context();
105
106 // Reserve some space for the tensors to avoid excessive resizing.
107 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
108 external_contexts_[i] = nullptr;
109 }
110
111 // This operation is cheap because we allocate the CPU context resources (i.e.
112 // threads) lazily.
113 own_external_cpu_backend_context_ =
114 std::make_unique<ExternalCpuBackendContext>();
115 external_contexts_[kTfLiteCpuBackendContext] =
116 own_external_cpu_backend_context_.get();
117}
118
119Interpreter::~Interpreter() {
120 // The owned external Cpu Backend Context will go out of scope with this
121 // interpreter. If we have an external backend context that is not
122 // owned, we need to clear the cache for other interpreters that may
123 // use the context.
124 if (external_contexts_[kTfLiteCpuBackendContext] &&
125 (external_contexts_[kTfLiteCpuBackendContext] !=
126 own_external_cpu_backend_context_.get())) {
127 ExternalCpuBackendContext* external_context =
128 static_cast<ExternalCpuBackendContext*>(
129 external_contexts_[kTfLiteCpuBackendContext]);
130 TfLiteInternalBackendContext* internal_context =
131 external_context->internal_backend_context();
132 if (internal_context) {
133 // This call may have negative performance impacts on the next inference
134 // for any interpreter using this context. The cache will be refreshed
135 // by the next inference.
136 internal_context->ClearCaches();
137 }
138 }
139}
140
141void Interpreter::SetExternalContext(TfLiteExternalContextType type,
142 TfLiteExternalContext* ctx) {
143 if (ctx == own_external_cpu_backend_context_.get()) {
144 error_reporter_->Report(
145 "WARNING: The passed external context is identical to the internally "
146 "owned one.");
147 return;
148 }
149
150 // We have an internally owned external context of kTfLiteCpuBackendContext.
151 // If it's overwritten here, we will release the resource of the internally
152 // owned external context.
153 // Note: the 'max thread count' info associated with the overwritten context
154 // will be lost here, and such info is now determined by the new context, thus
155 // affecting how much parallelism a TFLite op would have.
156 if (kTfLiteCpuBackendContext == type &&
157 external_contexts_[kTfLiteCpuBackendContext] ==
158 own_external_cpu_backend_context_.get()) {
159 own_external_cpu_backend_context_.reset();
160 }
161
162 // This essentially changes the "external_contexts_[type]".
163 primary_subgraph().SetExternalContext(type, ctx);
164}
165
166TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
167 return primary_subgraph().SetInputs(std::move(inputs));
168}
169
170TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
171 return primary_subgraph().SetOutputs(std::move(outputs));
172}
173
174TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
175 return primary_subgraph().SetVariables(std::move(variables));
176}
177
178TfLiteStatus Interpreter::AllocateTensors() {
179 // Apply the default delegate that TFLite will enable at this point to allow
180 // other user-level delegates to be applied first. Only returns error when
181 // the status is kTfLiteError. For other statuses, it will fall back to the
182 // default implementation.
183 if (ApplyLazyDelegateProviders() == kTfLiteError) return kTfLiteError;
184
185 return primary_subgraph().AllocateTensors();
186}
187
188void Interpreter::AddSubgraphs(int subgraphs_to_add,
189 int* first_new_subgraph_index) {
190 const size_t base_index = subgraphs_.size();
191 if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
192
193 subgraphs_.reserve(base_index + subgraphs_to_add);
194 for (int i = 0; i < subgraphs_to_add; ++i) {
195 Subgraph* subgraph = new Subgraph(
196 error_reporter_, external_contexts_, &subgraphs_, &resources_,
197 &resource_ids_, &initialization_status_map_, subgraphs_.size());
198 subgraphs_.emplace_back(subgraph);
199 }
200}
201
202TfLiteStatus Interpreter::AddNodeWithParameters(
203 const std::vector<int>& inputs, const std::vector<int>& outputs,
204 const char* init_data, size_t init_data_size, void* builtin_data,
205 const TfLiteRegistration* registration, int* node_index) {
206 return primary_subgraph().AddNodeWithParameters(
207 inputs, outputs, {}, init_data, init_data_size, builtin_data,
208 registration, node_index);
209}
210
211TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
212 const std::vector<int>& dims) {
213 return primary_subgraph().ResizeInputTensor(tensor_index, dims);
214}
215
216TfLiteStatus Interpreter::ResizeInputTensorStrict(
217 int tensor_index, const std::vector<int>& dims) {
218 return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
219}
220
221TfLiteStatus Interpreter::Invoke() {
222 ScopedRuntimeInstrumentationProfile scoped_runtime_event(root_profiler_.get(),
223 "invoke");
224
225 // Denormal floating point numbers could cause significant slowdown on
226 // platforms like x86, therefore, we suppress denormals here to prevent this
227 // from happening.
228 ruy::ScopedSuppressDenormals suppress_denormals;
229
230 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
231 scoped_runtime_event, primary_subgraph().Invoke());
232
233 if (!allow_buffer_handle_output_) {
234 for (int tensor_index : outputs()) {
235 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
236 scoped_runtime_event,
237 primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
238 }
239 }
240
241 return kTfLiteOk;
242}
243
244TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
245 int* first_new_tensor_index) {
246 return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
247}
248
249TfLiteStatus Interpreter::SetTensorParametersReadOnly(
250 int tensor_index, TfLiteType type, const char* name,
251 const std::vector<int>& dims, TfLiteQuantization quantization,
252 const char* buffer, size_t bytes, const Allocation* allocation) {
253 return primary_subgraph().SetTensorParametersReadOnly(
254 tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
255 bytes, allocation);
256}
257
258TfLiteStatus Interpreter::SetTensorParametersReadWrite(
259 int tensor_index, TfLiteType type, const char* name,
260 const std::vector<int>& dims, TfLiteQuantization quantization,
261 bool is_variable) {
262 return primary_subgraph().SetTensorParametersReadWrite(
263 tensor_index, type, name, dims.size(), dims.data(), quantization,
264 is_variable);
265}
266
267TfLiteStatus Interpreter::SetTensorParametersReadOnly(
268 int tensor_index, TfLiteType type, const char* name, const size_t rank,
269 const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
270 size_t bytes, const Allocation* allocation) {
271 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
272 return primary_subgraph().SetTensorParametersReadOnly(
273 tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
274 allocation);
275}
276
277TfLiteStatus Interpreter::SetTensorParametersReadWrite(
278 int tensor_index, TfLiteType type, const char* name, const size_t rank,
279 const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
280 const size_t rank_dims_signature, const int* dims_signature) {
281 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
282 return primary_subgraph().SetTensorParametersReadWrite(
283 tensor_index, type, name, rank, dims, new_quantization, is_variable,
284 rank_dims_signature, dims_signature);
285}
286
287TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
288 return primary_subgraph().SetExecutionPlan(new_plan);
289}
290
291TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
292 if (num_threads < -1) {
293 context_->ReportError(context_,
294 "num_threads should be >=0 or just -1 to let TFLite "
295 "runtime set the value.");
296 return kTfLiteError;
297 }
298
299 // num_threads == 0 has the same effect as num_threads == 1.
300 num_threads = num_threads == 0 ? 1 : num_threads;
301 for (auto& subgraph : subgraphs_) {
302 subgraph->context()->recommended_num_threads = num_threads;
303 }
304
305 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
306 auto* c = external_contexts_[i];
307 if (c && c->Refresh) {
308 c->Refresh(context_);
309 }
310 }
311 return kTfLiteOk;
312}
313
314TfLiteStatus Interpreter::ApplyLazyDelegateProviders() {
315 if (lazy_delegate_providers_.empty() || IsFullyDelegated()) return kTfLiteOk;
316
317 // We only apply lazy delegate providers once.
318 TfLiteDelegateCreators delegate_providers;
319 delegate_providers.swap(lazy_delegate_providers_);
320
321 TFLITE_LOG(TFLITE_LOG_INFO,
322 "Applying %zu TensorFlow Lite delegate(s) lazily.",
323 delegate_providers.size());
324 // At the momement, XNNPACK delegate is the only one that might be applied
325 // by default, in which case, the execution will fall back to default
326 // implementation if the XNNPACK delegate fails to be applied.
327 for (size_t i = 0; i < delegate_providers.size(); ++i) {
328 auto delegate_ptr =
329 delegate_providers[i](context_->recommended_num_threads);
330 // Note when XNNPACK-by-default is disabled, the corresponding creator (i.e.
331 // tflite::MaybeCreateXNNPACKDelegate(...)) will return a nullptr.
332 // Therefore, we simply continue with the next one.
333 if (delegate_ptr == nullptr) continue;
334 auto status = ModifyGraphWithDelegateImpl(std::move(delegate_ptr));
335 switch (status) {
336 case kTfLiteOk:
337 TFLITE_LOG(
338 TFLITE_LOG_INFO,
339 "Successfully applied the default TensorFlow Lite "
340 "delegate indexed at %zu.\n *NOTE*: because a delegate has been "
341 "applied, the precision of computations should be unchanged, but "
342 "the exact output tensor values may have changed. If such output "
343 "values are checked in your code, like in your tests etc., please "
344 "consider increasing error tolerance for the check.",
345 i);
346 break;
347 case kTfLiteError:
348 TF_LITE_REPORT_ERROR(error_reporter_,
349 "Failed to apply the default TensorFlow Lite "
350 "delegate indexed at %zu.",
351 i);
352 return kTfLiteError;
353 case kTfLiteDelegateError:
354 TFLITE_LOG(
355 TFLITE_LOG_INFO,
356 "Error in applying the default TensorFlow Lite delegate indexed "
357 "at %zu, and all previously applied delegates are reverted.",
358 i);
359 return kTfLiteDelegateError;
360 case kTfLiteApplicationError:
361 TFLITE_LOG(
362 TFLITE_LOG_INFO,
363 "Failed to apply the default TensorFlow Lite delegate indexed at "
364 "%zu because of incompatibility between runtime and delegate. "
365 "Ignoring the error, and continuing anyway.",
366 i);
367 return kTfLiteApplicationError;
368 case kTfLiteUnresolvedOps:
369 TFLITE_LOG(
370 TFLITE_LOG_INFO,
371 "Failed to apply the default TensorFlow Lite delegate indexed at "
372 "%zu because of unresolved ops (which could be resolved by "
373 "another delegate). Ignoring the error, and continuing anyway.",
374 i);
375 return kTfLiteUnresolvedOps;
376 default:
377 TF_LITE_REPORT_ERROR(error_reporter_,
378 "Unknown status (%d) after applying the default "
379 "TensorFlow Lite delegate indexed at %zu.",
380 status, i);
381 return kTfLiteError;
382 }
383 }
384 return kTfLiteOk;
385}
386
387TfLiteStatus Interpreter::ModifyGraphWithDelegateImpl(
388 TfLiteDelegate* delegate) {
389 TfLiteStatus status = kTfLiteOk;
390 for (auto& subgraph : subgraphs_) {
391 if (IsValidationSubgraph(subgraph->GetName().c_str())) {
392 continue;
393 }
394 status = subgraph->ModifyGraphWithDelegate(delegate);
395 if (status != kTfLiteOk) {
396 break;
397 }
398 }
399 // Delegate-specific errors can be recovered from by restoring Interpreter to
400 // its original state.
401 if (status == kTfLiteDelegateError) {
402 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
403 }
404 return status;
405}
406
407TfLiteStatus Interpreter::RemoveAllDelegates() {
408 for (auto& subgraph : subgraphs_) {
409 TF_LITE_ENSURE_STATUS(subgraph->RemoveAllDelegates());
410 }
411 return kTfLiteOk;
412}
413
414TfLiteStatus Interpreter::SetMetadata(
415 const std::map<std::string, std::string>& metadata) {
416 metadata_ = metadata;
417 for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
418 ++subgraph_index) {
419 TF_LITE_ENSURE_STATUS(subgraphs_[subgraph_index]->SetMetadata(&metadata_));
420 }
421 return kTfLiteOk;
422}
423
424bool Interpreter::IsFullyDelegated() const {
425 return primary_subgraph().IsFullyDelegated();
426}
427
428void Interpreter::SetProfilerImpl(std::unique_ptr<Profiler> profiler) {
429 if (profiler == nullptr) {
430 root_profiler_ = nullptr;
431 return;
432 }
433 if (root_profiler_ == nullptr) {
434 root_profiler_ = std::make_unique<profiling::RootProfiler>();
435 } else {
436 // Removes all previously registered profilers.
437 root_profiler_->RemoveChildProfilers();
438 }
439 root_profiler_->AddProfiler(std::move(profiler));
440 SetSubgraphProfiler();
441}
442
443void Interpreter::SetSubgraphProfiler() {
444 for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
445 ++subgraph_index) {
446 subgraphs_[subgraph_index]->SetProfiler(root_profiler_.get(),
447 subgraph_index);
448 }
449}
450
451TfLiteStatus Interpreter::ApplyOptionsImpl(InterpreterOptions* options) {
452 if (options == nullptr) {
453 return kTfLiteOk;
454 }
455 options_ = std::make_unique<InterpreterOptions>(*options);
456
457 // Set InterpreterOptions object to SubGraph.
458 for (auto& subgraph : subgraphs_) {
459 subgraph->SetOptions(options_.get());
460 }
461
462 // Handle `experimental_dynamic_allocation_for_large_tensors_`.
463 if (options->GetDynamicAllocationForLargeTensors() > 0) {
464 for (auto& subgraph : subgraphs_) {
465 subgraph->OptimizeMemoryForLargeTensors(
466 options->GetDynamicAllocationForLargeTensors());
467 }
468 }
469 return kTfLiteOk;
470}
471
472} // namespace tflite
473