interpreter.cc source code [tensorflow/tensorflow/lite/interpreter.cc]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "tensorflow/lite/interpreter.h"
17
18	#include <stddef.h>
19	#include <stdlib.h>
20
21	#include <cstdint>
22	#include <functional>
23	#include <memory>
24	#include <string>
25	#include <utility>
26	#include <vector>
27
28	#include "ruy/denormal.h" // from @ruy
29	#include "tensorflow/lite/allocation.h"
30	#include "tensorflow/lite/core/api/error_reporter.h"
31	#include "tensorflow/lite/core/api/profiler.h"
32	#include "tensorflow/lite/external_cpu_backend_context.h"
33	#include "tensorflow/lite/interpreter_options.h"
34	#include "tensorflow/lite/minimal_logging.h"
35	#include "tensorflow/lite/stderr_reporter.h"
36	#include "tensorflow/lite/util.h"
37
38	// TODO(b/139446230): Move to portable platform header.
39	#if defined(__ANDROID__)
40	#define TFLITE_IS_MOBILE_PLATFORM
41	#endif // defined(__ANDROID__)
42
43	#if defined(__APPLE__)
44	#include "TargetConditionals.h"
45	#if TARGET_IPHONE_SIMULATOR
46	#define TFLITE_IS_MOBILE_PLATFORM
47	#elif TARGET_OS_IPHONE
48	#define TFLITE_IS_MOBILE_PLATFORM
49	#endif
50	#endif // defined(__APPLE__)
51
52	// TODO(b/132087118): move static_assert to c_api_internal when compiled with
53	// C++.
54	static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
55	"Float 16 type must be 16 bits.");
56
57	namespace tflite {
58
59	namespace {
60
61	// Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
62	TfLiteQuantization GetQuantizationFromLegacy(
63	const TfLiteQuantizationParams& legacy_quantization) {
64	TfLiteQuantization quantization;
65	quantization.type = kTfLiteAffineQuantization;
66	auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
67	malloc(sizeof(TfLiteAffineQuantization)));
68	affine_quantization->scale = TfLiteFloatArrayCreate(`1`);
69	affine_quantization->zero_point = TfLiteIntArrayCreate(`1`);
70	affine_quantization->scale->data[`0`] = legacy_quantization.scale;
71	affine_quantization->zero_point->data[`0`] = legacy_quantization.zero_point;
72	quantization.params = affine_quantization;
73
74	return quantization;
75	}
76
77	// TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
78	// temporarily because delegate-specific error codes are either not retrievable
79	// at the moment, which we will add later.
80	#define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
81	do { \
82	TfLiteStatus status = (a); \
83	runtime_event.set_runtime_status(/delegate_status=/0, \
84	static_cast<int64_t>(status)); \
85	TF_LITE_ENSURE_STATUS(status); \
86	} while (0)
87
88	} // namespace
89
90	Interpreter::Interpreter(ErrorReporter* error_reporter)
91	: error_reporter_(error_reporter ? error_reporter
92	: DefaultErrorReporter()) {
93	// TODO(b/128420794): Include the TFLite runtime version in the log.
94	// Prod logging is useful for mobile platforms where scraping console logs is
95	// critical for debugging.
96	#if defined(TFLITE_IS_MOBILE_PLATFORM)
97	TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
98	#else
99	TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
100	#endif
101
102	// There's always at least 1 subgraph which is the primary subgraph.
103	AddSubgraphs(`1`);
104	context_ = primary_subgraph().context();
105
106	// Reserve some space for the tensors to avoid excessive resizing.
107	for (int i = `0`; i < kTfLiteMaxExternalContexts; ++i) {
108	external_contexts_[i] = nullptr;
109	}
110
111	// This operation is cheap because we allocate the CPU context resources (i.e.
112	// threads) lazily.
113	own_external_cpu_backend_context_ =
114	std::make_unique<ExternalCpuBackendContext>();
115	external_contexts_[kTfLiteCpuBackendContext] =
116	own_external_cpu_backend_context_.get();
117	}
118
119	Interpreter::~Interpreter() {
120	// The owned external Cpu Backend Context will go out of scope with this
121	// interpreter. If we have an external backend context that is not
122	// owned, we need to clear the cache for other interpreters that may
123	// use the context.
124	if (external_contexts_[kTfLiteCpuBackendContext] &&
125	(external_contexts_[kTfLiteCpuBackendContext] !=
126	own_external_cpu_backend_context_.get())) {
127	ExternalCpuBackendContext* external_context =
128	static_cast<ExternalCpuBackendContext*>(
129	external_contexts_[kTfLiteCpuBackendContext]);
130	TfLiteInternalBackendContext* internal_context =
131	external_context->internal_backend_context();
132	if (internal_context) {
133	// This call may have negative performance impacts on the next inference
134	// for any interpreter using this context. The cache will be refreshed
135	// by the next inference.
136	internal_context->ClearCaches();
137	}
138	}
139	}
140
141	void Interpreter::SetExternalContext(TfLiteExternalContextType type,
142	TfLiteExternalContext* ctx) {
143	if (ctx == own_external_cpu_backend_context_.get()) {
144	error_reporter_->Report(
145	"WARNING: The passed external context is identical to the internally "
146	"owned one.");
147	return;
148	}
149
150	// We have an internally owned external context of kTfLiteCpuBackendContext.
151	// If it's overwritten here, we will release the resource of the internally
152	// owned external context.
153	// Note: the 'max thread count' info associated with the overwritten context
154	// will be lost here, and such info is now determined by the new context, thus
155	// affecting how much parallelism a TFLite op would have.
156	if (kTfLiteCpuBackendContext == type &&
157	external_contexts_[kTfLiteCpuBackendContext] ==
158	own_external_cpu_backend_context_.get()) {
159	own_external_cpu_backend_context_.reset();
160	}
161
162	// This essentially changes the "external_contexts_[type]".
163	primary_subgraph().SetExternalContext(type, ctx);
164	}
165
166	TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
167	return primary_subgraph().SetInputs(std::move(inputs));
168	}
169
170	TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
171	return primary_subgraph().SetOutputs(std::move(outputs));
172	}
173
174	TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
175	return primary_subgraph().SetVariables(std::move(variables));
176	}
177
178	TfLiteStatus Interpreter::AllocateTensors() {
179	// Apply the default delegate that TFLite will enable at this point to allow
180	// other user-level delegates to be applied first. Only returns error when
181	// the status is kTfLiteError. For other statuses, it will fall back to the
182	// default implementation.
183	if (ApplyLazyDelegateProviders() == kTfLiteError) return kTfLiteError;
184
185	return primary_subgraph().AllocateTensors();
186	}
187
188	void Interpreter::AddSubgraphs(int subgraphs_to_add,
189	int* first_new_subgraph_index) {
190	const size_t base_index = subgraphs_.size();
191	if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
192
193	subgraphs_.reserve(base_index + subgraphs_to_add);
194	for (int i = `0`; i < subgraphs_to_add; ++i) {
195	Subgraph* subgraph = new Subgraph (
196	error_reporter_, external_contexts_, &subgraphs_, &resources_,
197	&resource_ids_, &initialization_status_map_, subgraphs_.size());
198	subgraphs_.emplace_back(subgraph);
199	}
200	}
201
202	TfLiteStatus Interpreter::AddNodeWithParameters(
203	const std::vector<int>& inputs, const std::vector<int>& outputs,
204	const char* init_data, size_t init_data_size, void* builtin_data,
205	const TfLiteRegistration* registration, int* node_index) {
206	return primary_subgraph().AddNodeWithParameters(
207	inputs, outputs, {}, init_data, init_data_size, builtin_data,
208	registration, node_index);
209	}
210
211	TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
212	const std::vector<int>& dims) {
213	return primary_subgraph().ResizeInputTensor(tensor_index, dims);
214	}
215
216	TfLiteStatus Interpreter::ResizeInputTensorStrict(
217	int tensor_index, const std::vector<int>& dims) {
218	return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
219	}
220
221	TfLiteStatus Interpreter::Invoke() {
222	ScopedRuntimeInstrumentationProfile scoped_runtime_event(root_profiler_.get(),
223	"invoke");
224
225	// Denormal floating point numbers could cause significant slowdown on
226	// platforms like x86, therefore, we suppress denormals here to prevent this
227	// from happening.
228	ruy::ScopedSuppressDenormals suppress_denormals;
229
230	TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
231	scoped_runtime_event, primary_subgraph().Invoke());
232
233	if (!allow_buffer_handle_output_) {
234	for (int tensor_index : outputs()) {
235	TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
236	scoped_runtime_event,
237	primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
238	}
239	}
240
241	return kTfLiteOk;
242	}
243
244	TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
245	int* first_new_tensor_index) {
246	return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
247	}
248
249	TfLiteStatus Interpreter::SetTensorParametersReadOnly(
250	int tensor_index, TfLiteType type, const char* name,
251	const std::vector<int>& dims, TfLiteQuantization quantization,
252	const char* buffer, size_t bytes, const Allocation* allocation) {
253	return primary_subgraph().SetTensorParametersReadOnly(
254	tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
255	bytes, allocation);
256	}
257
258	TfLiteStatus Interpreter::SetTensorParametersReadWrite(
259	int tensor_index, TfLiteType type, const char* name,
260	const std::vector<int>& dims, TfLiteQuantization quantization,
261	bool is_variable) {
262	return primary_subgraph().SetTensorParametersReadWrite(
263	tensor_index, type, name, dims.size(), dims.data(), quantization,
264	is_variable);
265	}
266
267	TfLiteStatus Interpreter::SetTensorParametersReadOnly(
268	int tensor_index, TfLiteType type, const char* name, const size_t rank,
269	const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
270	size_t bytes, const Allocation* allocation) {
271	TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
272	return primary_subgraph().SetTensorParametersReadOnly(
273	tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
274	allocation);
275	}
276
277	TfLiteStatus Interpreter::SetTensorParametersReadWrite(
278	int tensor_index, TfLiteType type, const char* name, const size_t rank,
279	const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
280	const size_t rank_dims_signature, const int* dims_signature) {
281	TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
282	return primary_subgraph().SetTensorParametersReadWrite(
283	tensor_index, type, name, rank, dims, new_quantization, is_variable,
284	rank_dims_signature, dims_signature);
285	}
286
287	TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
288	return primary_subgraph().SetExecutionPlan(new_plan);
289	}
290
291	TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
292	if (num_threads < -`1`) {
293	context_->ReportError(context_,
294	"num_threads should be >=0 or just -1 to let TFLite "
295	"runtime set the value.");
296	return kTfLiteError;
297	}
298
299	// num_threads == 0 has the same effect as num_threads == 1.
300	num_threads = num_threads == `0` ? `1` : num_threads;
301	for (auto& subgraph : subgraphs_) {
302	subgraph ->context()->recommended_num_threads = num_threads;
303	}
304
305	for (int i = `0`; i < kTfLiteMaxExternalContexts; ++i) {
306	auto* c = external_contexts_[i];
307	if (c && c->Refresh) {
308	c->Refresh(context_);
309	}
310	}
311	return kTfLiteOk;
312	}
313
314	TfLiteStatus Interpreter::ApplyLazyDelegateProviders() {
315	if (lazy_delegate_providers_.empty() \|\| IsFullyDelegated()) return kTfLiteOk;
316
317	// We only apply lazy delegate providers once.
318	TfLiteDelegateCreators delegate_providers;
319	delegate_providers.swap(lazy_delegate_providers_);
320
321	TFLITE_LOG(TFLITE_LOG_INFO,
322	"Applying %zu TensorFlow Lite delegate(s) lazily.",
323	delegate_providers.size());
324	// At the momement, XNNPACK delegate is the only one that might be applied
325	// by default, in which case, the execution will fall back to default
326	// implementation if the XNNPACK delegate fails to be applied.
327	for (size_t i = `0`; i < delegate_providers.size(); ++i) {
328	auto delegate_ptr =
329	delegate_providers [i](context_->recommended_num_threads);
330	// Note when XNNPACK-by-default is disabled, the corresponding creator (i.e.
331	// tflite::MaybeCreateXNNPACKDelegate(...)) will return a nullptr.
332	// Therefore, we simply continue with the next one.
333	if (delegate_ptr == nullptr) continue;
334	auto status = ModifyGraphWithDelegateImpl(std::move(delegate_ptr));
335	switch (status) {
336	case kTfLiteOk:
337	TFLITE_LOG(
338	TFLITE_LOG_INFO,
339	"Successfully applied the default TensorFlow Lite "
340	"delegate indexed at %zu.\n NOTE: because a delegate has been "
341	"applied, the precision of computations should be unchanged, but "
342	"the exact output tensor values may have changed. If such output "
343	"values are checked in your code, like in your tests etc., please "
344	"consider increasing error tolerance for the check.",
345	i);
346	break;
347	case kTfLiteError:
348	TF_LITE_REPORT_ERROR(error_reporter_,
349	"Failed to apply the default TensorFlow Lite "
350	"delegate indexed at %zu.",
351	i);
352	return kTfLiteError;
353	case kTfLiteDelegateError:
354	TFLITE_LOG(
355	TFLITE_LOG_INFO,
356	"Error in applying the default TensorFlow Lite delegate indexed "
357	"at %zu, and all previously applied delegates are reverted.",
358	i);
359	return kTfLiteDelegateError;
360	case kTfLiteApplicationError:
361	TFLITE_LOG(
362	TFLITE_LOG_INFO,
363	"Failed to apply the default TensorFlow Lite delegate indexed at "
364	"%zu because of incompatibility between runtime and delegate. "
365	"Ignoring the error, and continuing anyway.",
366	i);
367	return kTfLiteApplicationError;
368	case kTfLiteUnresolvedOps:
369	TFLITE_LOG(
370	TFLITE_LOG_INFO,
371	"Failed to apply the default TensorFlow Lite delegate indexed at "
372	"%zu because of unresolved ops (which could be resolved by "
373	"another delegate). Ignoring the error, and continuing anyway.",
374	i);
375	return kTfLiteUnresolvedOps;
376	default:
377	TF_LITE_REPORT_ERROR(error_reporter_,
378	"Unknown status (%d) after applying the default "
379	"TensorFlow Lite delegate indexed at %zu.",
380	status, i);
381	return kTfLiteError;
382	}
383	}
384	return kTfLiteOk;
385	}
386
387	TfLiteStatus Interpreter::ModifyGraphWithDelegateImpl(
388	TfLiteDelegate* delegate) {
389	TfLiteStatus status = kTfLiteOk;
390	for (auto& subgraph : subgraphs_) {
391	if (IsValidationSubgraph(subgraph ->GetName().c_str())) {
392	continue;
393	}
394	status = subgraph ->ModifyGraphWithDelegate(delegate);
395	if (status != kTfLiteOk) {
396	break;
397	}
398	}
399	// Delegate-specific errors can be recovered from by restoring Interpreter to
400	// its original state.
401	if (status == kTfLiteDelegateError) {
402	TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
403	}
404	return status;
405	}
406
407	TfLiteStatus Interpreter::RemoveAllDelegates() {
408	for (auto& subgraph : subgraphs_) {
409	TF_LITE_ENSURE_STATUS(subgraph ->RemoveAllDelegates());
410	}
411	return kTfLiteOk;
412	}
413
414	TfLiteStatus Interpreter::SetMetadata(
415	const std::map<std::string, std::string>& metadata) {
416	metadata_ = metadata;
417	for (int subgraph_index = `0`; subgraph_index < subgraphs_.size();
418	++subgraph_index) {
419	TF_LITE_ENSURE_STATUS(subgraphs_[subgraph_index]->SetMetadata(&metadata_));
420	}
421	return kTfLiteOk;
422	}
423
424	bool Interpreter::IsFullyDelegated() const {
425	return primary_subgraph().IsFullyDelegated();
426	}
427
428	void Interpreter::SetProfilerImpl(std::unique_ptr<Profiler> profiler) {
429	if (profiler == nullptr) {
430	root_profiler_ = nullptr;
431	return;
432	}
433	if (root_profiler_ == nullptr) {
434	root_profiler_ = std::make_unique<profiling::RootProfiler>();
435	} else {
436	// Removes all previously registered profilers.
437	root_profiler_->RemoveChildProfilers();
438	}
439	root_profiler_->AddProfiler(std::move(profiler));
440	SetSubgraphProfiler();
441	}
442
443	void Interpreter::SetSubgraphProfiler() {
444	for (int subgraph_index = `0`; subgraph_index < subgraphs_.size();
445	++subgraph_index) {
446	subgraphs_[subgraph_index]->SetProfiler(root_profiler_.get(),
447	subgraph_index);
448	}
449	}
450
451	TfLiteStatus Interpreter::ApplyOptionsImpl(InterpreterOptions* options) {
452	if (options == nullptr) {
453	return kTfLiteOk;
454	}
455	options_ = std::make_unique<InterpreterOptions>(*options);
456
457	// Set InterpreterOptions object to SubGraph.
458	for (auto& subgraph : subgraphs_) {
459	subgraph ->SetOptions(options_.get());
460	}
461
462	// Handle `experimental_dynamic_allocation_for_large_tensors_`.
463	if (options->GetDynamicAllocationForLargeTensors() > `0`) {
464	for (auto& subgraph : subgraphs_) {
465	subgraph ->OptimizeMemoryForLargeTensors(
466	options->GetDynamicAllocationForLargeTensors());
467	}
468	}
469	return kTfLiteOk;
470	}
471
472	} // namespace tflite
473

Browse the source code of tensorflow/tensorflow/lite/interpreter.cc