1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/lite/simple_memory_arena.h" |
17 | |
18 | #include <stddef.h> |
19 | #include <stdint.h> |
20 | |
21 | #include <algorithm> |
22 | #include <cstring> |
23 | #include <iterator> |
24 | #include <limits> |
25 | #include <memory> |
26 | #include <string> |
27 | #include <vector> |
28 | |
29 | #include "tensorflow/lite/c/common.h" |
30 | #include "tensorflow/lite/core/macros.h" |
31 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
32 | #include "tensorflow/lite/tensorflow_profiler_logger.h" |
33 | #endif // TF_LITE_TENSORFLOW_PROFILER |
34 | |
35 | namespace { |
36 | |
37 | template <typename T> |
38 | T AlignTo(size_t alignment, T offset) { |
39 | return offset % alignment == 0 ? offset |
40 | : offset + (alignment - offset % alignment); |
41 | } |
42 | |
43 | } // namespace |
44 | |
45 | namespace tflite { |
46 | |
47 | void SimpleMemoryArena::ResolveDeallocations() { |
48 | if (!allocs_erased_) { |
49 | return; |
50 | } |
51 | ordered_allocs_.erase( |
52 | std::remove_if(ordered_allocs_.begin(), ordered_allocs_.end(), |
53 | [](ArenaAllocWithUsageInterval& alloc) { |
54 | return alloc.tensor == -1; |
55 | }), |
56 | ordered_allocs_.end()); |
57 | allocs_erased_ = false; |
58 | } |
59 | |
60 | TfLiteStatus SimpleMemoryArena::Allocate( |
61 | TfLiteContext* context, size_t alignment, size_t size, int32_t tensor, |
62 | int32_t first_node, int32_t last_node, |
63 | ArenaAllocWithUsageInterval* new_alloc) { |
64 | TF_LITE_ENSURE(context, alignment <= arena_alignment_); |
65 | new_alloc->tensor = tensor; |
66 | new_alloc->first_node = first_node; |
67 | new_alloc->last_node = last_node; |
68 | new_alloc->size = size; |
69 | if (size == 0) { |
70 | new_alloc->offset = 0; |
71 | return kTfLiteOk; |
72 | } |
73 | |
74 | // If we don't find a better gap just allocate at the end of the buffer. |
75 | const size_t kOffsetNotAssigned = std::numeric_limits<size_t>::max(); |
76 | size_t best_offset = kOffsetNotAssigned; |
77 | size_t best_offset_fit = kOffsetNotAssigned; |
78 | |
79 | // Go through the sorted allocs and look at the gaps between them. |
80 | size_t current_offset = 0; |
81 | for (const auto& alloc : ordered_allocs_) { |
82 | if (alloc.last_node < first_node || alloc.first_node > last_node) { |
83 | // Usage interval of alloc doesn't intersect with current tensor's usage |
84 | // interval, so we skip it. |
85 | continue; |
86 | } |
87 | size_t aligned_current_offset = AlignTo(alignment, current_offset); |
88 | // If we found a gap larger than required size, and smaller than previous |
89 | // best fit, take it. |
90 | if (aligned_current_offset + size <= alloc.offset && |
91 | alloc.offset - aligned_current_offset < best_offset_fit) { |
92 | best_offset = aligned_current_offset; |
93 | best_offset_fit = alloc.offset - current_offset; |
94 | } |
95 | current_offset = std::max(current_offset, alloc.offset + alloc.size); |
96 | // A gap of zero is as good as it gets, no point continuing. |
97 | if (best_offset_fit == 0) { |
98 | break; |
99 | } |
100 | } |
101 | if (best_offset == kOffsetNotAssigned) { |
102 | best_offset = AlignTo(alignment, current_offset); |
103 | } |
104 | |
105 | // Update the required buffer size. |
106 | high_water_mark_ = std::max(high_water_mark_, best_offset + size); |
107 | new_alloc->offset = best_offset; |
108 | |
109 | auto insertion_it = std::upper_bound(ordered_allocs_.begin(), |
110 | ordered_allocs_.end(), *new_alloc); |
111 | ordered_allocs_.insert(insertion_it, *new_alloc); |
112 | return kTfLiteOk; |
113 | } |
114 | |
115 | void SimpleMemoryArena::DeallocateAfter(int32_t node) { |
116 | for (int i = 0; i < ordered_allocs_.size(); ++i) { |
117 | if (ordered_allocs_[i].first_node > node) { |
118 | ordered_allocs_[i].tensor = -1; |
119 | allocs_erased_ = true; |
120 | } |
121 | } |
122 | ResolveDeallocations(); |
123 | } |
124 | |
125 | TfLiteStatus SimpleMemoryArena::Deallocate(TfLiteContext* context, |
126 | ArenaAllocWithUsageInterval& alloc) { |
127 | if (alloc.size == 0) { |
128 | return kTfLiteOk; |
129 | } |
130 | |
131 | for (int i = 0; i < ordered_allocs_.size(); ++i) { |
132 | if (ordered_allocs_[i].tensor == alloc.tensor) { |
133 | ordered_allocs_[i].tensor = -1; |
134 | allocs_erased_ = true; |
135 | break; |
136 | } |
137 | } |
138 | return kTfLiteOk; |
139 | } |
140 | |
141 | TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context, |
142 | bool* arena_reallocated) { |
143 | size_t required_size = RequiredBufferSize(); |
144 | if (required_size > underlying_buffer_size_) { |
145 | *arena_reallocated = true; |
146 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
147 | OnTfLiteArenaAlloc(subgraph_index_, reinterpret_cast<std::uintptr_t>(this), |
148 | required_size); |
149 | #endif |
150 | char* new_alloc = new char[required_size]; |
151 | char* new_underlying_buffer_aligned_ptr = reinterpret_cast<char*>( |
152 | AlignTo(arena_alignment_, reinterpret_cast<intptr_t>(new_alloc))); |
153 | |
154 | // If the arena had been previously allocated, copy over the old memory. |
155 | // Since Alloc pointers are offset based, they will remain valid in the new |
156 | // memory block. |
157 | if (high_water_mark_ > 0 && underlying_buffer_size_ > 0) { |
158 | size_t copy_amount = std::min( |
159 | underlying_buffer_.get() + underlying_buffer_size_ - |
160 | underlying_buffer_aligned_ptr_, |
161 | new_alloc + required_size - new_underlying_buffer_aligned_ptr); |
162 | memcpy(new_underlying_buffer_aligned_ptr, underlying_buffer_aligned_ptr_, |
163 | copy_amount); |
164 | } |
165 | |
166 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
167 | if (underlying_buffer_size_ > 0) { |
168 | OnTfLiteArenaDealloc(subgraph_index_, |
169 | reinterpret_cast<std::uintptr_t>(this), |
170 | underlying_buffer_size_); |
171 | } |
172 | #endif |
173 | underlying_buffer_.reset(new_alloc); |
174 | underlying_buffer_size_ = required_size; |
175 | underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; |
176 | } else { |
177 | *arena_reallocated = false; |
178 | } |
179 | committed_ = true; |
180 | return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; |
181 | } |
182 | |
183 | TfLiteStatus SimpleMemoryArena::ResolveAlloc( |
184 | TfLiteContext* context, const ArenaAllocWithUsageInterval& alloc, |
185 | char** output_ptr) { |
186 | TF_LITE_ENSURE(context, committed_); |
187 | TF_LITE_ENSURE(context, output_ptr != nullptr); |
188 | TF_LITE_ENSURE(context, |
189 | underlying_buffer_size_ >= (alloc.offset + alloc.size)); |
190 | if (alloc.size == 0) { |
191 | *output_ptr = nullptr; |
192 | } else { |
193 | *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; |
194 | } |
195 | return kTfLiteOk; |
196 | } |
197 | |
198 | TfLiteStatus SimpleMemoryArena::ClearPlan() { |
199 | committed_ = false; |
200 | high_water_mark_ = 0; |
201 | ordered_allocs_.clear(); |
202 | return kTfLiteOk; |
203 | } |
204 | |
205 | TfLiteStatus SimpleMemoryArena::ReleaseBuffer() { |
206 | committed_ = false; |
207 | #ifdef TF_LITE_TENSORFLOW_PROFILER |
208 | OnTfLiteArenaDealloc(subgraph_index_, reinterpret_cast<std::uintptr_t>(this), |
209 | underlying_buffer_size_); |
210 | #endif |
211 | underlying_buffer_size_ = 0; |
212 | underlying_buffer_aligned_ptr_ = nullptr; |
213 | underlying_buffer_.reset(); |
214 | return kTfLiteOk; |
215 | } |
216 | |
217 | // Using weak symbols to create a pluggable debugging module. |
218 | TFLITE_ATTRIBUTE_WEAK void DumpArenaInfo( |
219 | const std::string& name, const std::vector<int>& execution_plan, |
220 | size_t arena_size, const std::vector<ArenaAllocWithUsageInterval>& allocs) { |
221 | } |
222 | |
223 | void SimpleMemoryArena::DumpDebugInfo( |
224 | const std::string& name, const std::vector<int>& execution_plan) const { |
225 | tflite::DumpArenaInfo(name, execution_plan, underlying_buffer_size_, |
226 | ordered_allocs_); |
227 | } |
228 | |
229 | } // namespace tflite |
230 | |