1// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7#include <stdbool.h>
8#include <stdint.h>
9#include <stdlib.h>
10
11#include <xnnpack/memory-planner.h>
12#include <xnnpack/subgraph.h>
13
14// Check if two xnn_value's lifecycles overlap.
15inline static bool value_lifecycle_overlap(const struct xnn_value_usage* a, const struct xnn_value_usage* b) {
16 assert(a->last_node >= a->first_node);
17 assert(b->last_node >= b->first_node);
18 if (a->first_node < b->first_node) {
19 return a->last_node >= b->first_node;
20 } else {
21 return b->last_node >= a->first_node;
22 }
23}
24
25// Use this comparison function to sort xnn_value_usage according to the
26// tensor_size in decreasing order.
27static inline int cmp_value_usage_tensor_size(const void* a, const void* b) {
28 const size_t tensor_size_a = (*(struct xnn_value_usage *const*)a)->tensor_size;
29 const size_t tensor_size_b = (*(struct xnn_value_usage *const*)b)->tensor_size;
30 return (tensor_size_b > tensor_size_a) - (tensor_size_b < tensor_size_a);
31}
32
33static void populate_value_lifecycle(const xnn_subgraph_t subgraph, struct xnn_value_usage* usage) {
34 assert(subgraph != NULL);
35 if (subgraph->num_nodes == 0) {
36 return;
37 }
38 // As we initialized first/last_node in each xnn_value_usage to 0 as in 'xnn_init_value_mem_allocation_tracker',
39 // we start with the second node to tell whether first/last_node have been set or not, and check the first node last.
40 for (uint32_t nid = 1; nid < subgraph->num_nodes; ++nid) {
41 const struct xnn_node* node = subgraph->nodes + nid;
42 for (uint32_t i = 0; i < node->num_inputs; ++i) {
43 if (usage[node->inputs[i]].first_node == 0) {
44 usage[node->inputs[i]].first_node = nid;
45 }
46 usage[node->inputs[i]].last_node = nid;
47 }
48 for (uint32_t i = 0; i < node->num_outputs; ++i) {
49 if (usage[node->outputs[i]].first_node == 0) {
50 usage[node->outputs[i]].first_node = nid;
51 }
52 usage[node->outputs[i]].last_node = nid;
53 }
54 }
55 const struct xnn_node* first_node = subgraph->nodes;
56 for (uint32_t i = 0; i < first_node->num_inputs; ++i) {
57 usage[first_node->inputs[i]].first_node = 0;
58 }
59 for (uint32_t i = 0; i < first_node->num_outputs; ++i) {
60 usage[first_node->outputs[i]].first_node = 0;
61 }
62 // Separate loop over all values to make sure we have usage records properly initialized with invalid reuse_value_id.
63 // Some usage records are not associated with any nodes, and they will not be visited by the loops over nodes above.
64 for (uint32_t i = 0; i < subgraph->num_values; i++) {
65 usage[i].reuse_value_id = XNN_INVALID_VALUE_ID;
66 usage[i].alloc_offset = SIZE_MAX;
67 }
68}
69
70// Represent a memory block [start, end)
71struct memory_block {
72 size_t start;
73 size_t end;
74};
75
76// Use this comparison function to sort memory_block according to the 'start'
77// in increasing order.
78static inline int cmp_memory_block(const void* a, const void* b) {
79 const size_t start_a = ((const struct memory_block*)a)->start;
80 const size_t start_b = ((const struct memory_block*)b)->start;
81 return (start_a > start_b) - (start_a < start_b);
82}
83
84// Given the current live memory blocks, return the offset in a memory arena for a to-be-allocated value of size
85// 'to_alloc_size'.
86static size_t find_value_alloc_offset(struct memory_block* live_mem_blocks,
87 size_t num_mem_blocks,
88 size_t to_alloc_size) {
89 if (num_mem_blocks == 0) {
90 return 0;
91 }
92
93 if (num_mem_blocks == 1) {
94 return live_mem_blocks[0].end;
95 }
96
97 // Sort memory blocks according to 'start' in increasing order.
98 qsort(live_mem_blocks, num_mem_blocks, sizeof(struct memory_block), cmp_memory_block);
99
100 // Coalesce overlapping or immediate adjacent memory blocks to form a list of non-overlapping memory blocks in order
101 // to find the smallest gap.
102 size_t num_coalesced_mem_blocks = 1;
103 for (size_t i = 1; i < num_mem_blocks; ++i) {
104 const size_t current_coalesced_end =
105 live_mem_blocks[num_coalesced_mem_blocks - 1].end;
106 if (live_mem_blocks[i].start > current_coalesced_end) {
107 assert(num_coalesced_mem_blocks <= i);
108 live_mem_blocks[num_coalesced_mem_blocks] = live_mem_blocks[i];
109 num_coalesced_mem_blocks++;
110 continue;
111 }
112 if (live_mem_blocks[i].end > current_coalesced_end) {
113 live_mem_blocks[num_coalesced_mem_blocks - 1].end = live_mem_blocks[i].end;
114 }
115 }
116
117 size_t smallest_gap_size = SIZE_MAX;
118 // The first index to live_mem_blocks that the 'to_alloc_size' should be allocated after.
119 size_t smallest_gap_index = num_coalesced_mem_blocks - 1;
120 for (size_t i = 0; i < num_coalesced_mem_blocks - 1; ++i) {
121 assert(live_mem_blocks[i + 1].start > live_mem_blocks[i].end);
122 const size_t gap = live_mem_blocks[i + 1].start - live_mem_blocks[i].end;
123 if (gap >= to_alloc_size && gap < smallest_gap_size) {
124 smallest_gap_index = i;
125 smallest_gap_size = gap;
126 }
127 }
128 return live_mem_blocks[smallest_gap_index].end;
129}
130
131void xnn_init_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker, const xnn_subgraph_t subgraph) {
132 tracker->subgraph = subgraph;
133 tracker->mem_arena_size = 0;
134 tracker->usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage) * subgraph->num_values);
135#if XNN_ENABLE_MEMOPT
136 populate_value_lifecycle(tracker->subgraph, tracker->usage);
137#endif
138 tracker->min_value_id = XNN_INVALID_VALUE_ID;
139 tracker->max_value_id = XNN_INVALID_VALUE_ID;
140}
141
142void xnn_mark_tensor_as_reuse(struct xnn_value_allocation_tracker* tracker,
143 uint32_t value_id,
144 uint32_t reuse_value_id,
145 uint32_t new_last_node) {
146 // Set tensor_size to 0 so memory planner will not try to find memory for these tensors.
147 tracker->usage[value_id].tensor_size = 0;
148 tracker->usage[value_id].reuse_value_id = reuse_value_id;
149 // The reused tensor has an expanded live-range.
150 tracker->usage[reuse_value_id].last_node = new_last_node;
151}
152
153void xnn_add_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker,
154 uint32_t value_id,
155 size_t tensor_size) {
156 tracker->usage[value_id].tensor_size = tensor_size;
157 if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
158 tracker->min_value_id = value_id;
159 } else {
160 // Note that values are expected to be added in increasing order.
161 assert(value_id > tracker->min_value_id);
162 assert(value_id > tracker->max_value_id);
163 }
164
165 tracker->max_value_id = value_id;
166}
167
168void xnn_plan_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker) {
169#if XNN_ENABLE_MEMOPT
170 if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
171 assert(tracker->max_value_id == XNN_INVALID_VALUE_ID);
172 return;
173 }
174
175 const uint32_t num_values = tracker->max_value_id - tracker->min_value_id + 1;
176 struct xnn_value_usage** sorted_usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage*) * num_values);
177 size_t num_values_to_alloc = 0;
178 for (size_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
179 struct xnn_value_usage* info = tracker->usage + i;
180 if (info->tensor_size != 0) {
181 sorted_usage[num_values_to_alloc++] = info;
182 }
183 }
184 qsort(sorted_usage, num_values_to_alloc, sizeof(struct xnn_value_usage*), cmp_value_usage_tensor_size);
185
186 // Start the allocation planning process.
187 struct memory_block* current_live_mem_blocks = xnn_allocate_zero_memory(
188 sizeof(struct memory_block) * num_values_to_alloc);
189 size_t mem_arena_size = 0;
190 for (size_t i = 0; i < num_values_to_alloc; ++i) {
191 size_t num_live_mem_blocks = 0;
192 struct xnn_value_usage* current = sorted_usage[i];
193 for (size_t j = 0; j < i; ++j) {
194 const struct xnn_value_usage* allocated = sorted_usage[j];
195 if (value_lifecycle_overlap(current, allocated)) {
196 current_live_mem_blocks[num_live_mem_blocks++] = (struct memory_block){
197 .start = allocated->alloc_offset,
198 .end = allocated->alloc_offset + allocated->tensor_size,
199 };
200 }
201 }
202 current->alloc_offset = find_value_alloc_offset(current_live_mem_blocks, num_live_mem_blocks, current->tensor_size);
203 if (mem_arena_size < current->alloc_offset + current->tensor_size) {
204 mem_arena_size = current->alloc_offset + current->tensor_size;
205 }
206 }
207
208 // Walk through all tensors that are reusing memory, and update their usage records.
209 for (size_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
210 struct xnn_value_usage* usage = &tracker->usage[i];
211 uint32_t reuse_id = usage->reuse_value_id;
212 if (reuse_id == XNN_INVALID_VALUE_ID) {
213 continue;
214 }
215 assert(tracker->usage[reuse_id].alloc_offset != SIZE_MAX);
216 usage->alloc_offset = tracker->usage[reuse_id].alloc_offset;
217 }
218
219 tracker->mem_arena_size = mem_arena_size;
220 xnn_release_memory(sorted_usage);
221 xnn_release_memory(current_live_mem_blocks);
222#else
223 tracker->mem_arena_size = 0;
224 for (uint32_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
225 if (tracker->usage[i].tensor_size > 0) {
226 tracker->usage[i].alloc_offset = tracker->mem_arena_size;
227 tracker->mem_arena_size += tracker->usage[i].tensor_size;
228 }
229 }
230#endif
231}
232