memory-planner.c source code [tensorflow/external/XNNPACK/src/memory-planner.c]

1	// Copyright 2020 Google LLC
2	//
3	// This source code is licensed under the BSD-style license found in the
4	// LICENSE file in the root directory of this source tree.
5
6	#include <assert.h>
7	#include <stdbool.h>
8	#include <stdint.h>
9	#include <stdlib.h>
10
11	#include <xnnpack/memory-planner.h>
12	#include <xnnpack/subgraph.h>
13
14	// Check if two xnn_value's lifecycles overlap.
15	inline static bool value_lifecycle_overlap(const struct xnn_value_usage* a, const struct xnn_value_usage* b) {
16	assert(a->last_node >= a->first_node);
17	assert(b->last_node >= b->first_node);
18	if (a->first_node < b->first_node) {
19	return a->last_node >= b->first_node;
20	} else {
21	return b->last_node >= a->first_node;
22	}
23	}
24
25	// Use this comparison function to sort xnn_value_usage according to the
26	// tensor_size in decreasing order.
27	static inline int cmp_value_usage_tensor_size(const void* a, const void* b) {
28	const size_t tensor_size_a = ((struct* xnn_value_usage const)a)->tensor_size;
29	const size_t tensor_size_b = ((struct* xnn_value_usage const)b)->tensor_size;
30	return (tensor_size_b > tensor_size_a) - (tensor_size_b < tensor_size_a);
31	}
32
33	static void populate_value_lifecycle(const xnn_subgraph_t subgraph, struct xnn_value_usage* usage) {
34	assert(subgraph != NULL);
35	if (subgraph->num_nodes == `0`) {
36	return;
37	}
38	// As we initialized first/last_node in each xnn_value_usage to 0 as in 'xnn_init_value_mem_allocation_tracker',
39	// we start with the second node to tell whether first/last_node have been set or not, and check the first node last.
40	for (uint32_t nid = `1`; nid < subgraph->num_nodes; ++nid) {
41	const struct xnn_node* node = subgraph->nodes + nid;
42	for (uint32_t i = `0`; i < node->num_inputs; ++i) {
43	if (usage[node->inputs[i]].first_node == `0`) {
44	usage[node->inputs[i]].first_node = nid;
45	}
46	usage[node->inputs[i]].last_node = nid;
47	}
48	for (uint32_t i = `0`; i < node->num_outputs; ++i) {
49	if (usage[node->outputs[i]].first_node == `0`) {
50	usage[node->outputs[i]].first_node = nid;
51	}
52	usage[node->outputs[i]].last_node = nid;
53	}
54	}
55	const struct xnn_node* first_node = subgraph->nodes;
56	for (uint32_t i = `0`; i < first_node->num_inputs; ++i) {
57	usage[first_node->inputs[i]].first_node = `0`;
58	}
59	for (uint32_t i = `0`; i < first_node->num_outputs; ++i) {
60	usage[first_node->outputs[i]].first_node = `0`;
61	}
62	// Separate loop over all values to make sure we have usage records properly initialized with invalid reuse_value_id.
63	// Some usage records are not associated with any nodes, and they will not be visited by the loops over nodes above.
64	for (uint32_t i = `0`; i < subgraph->num_values; i++) {
65	usage[i].reuse_value_id = XNN_INVALID_VALUE_ID;
66	usage[i].alloc_offset = SIZE_MAX;
67	}
68	}
69
70	// Represent a memory block [start, end)
71	struct memory_block {
72	size_t start;
73	size_t end;
74	};
75
76	// Use this comparison function to sort memory_block according to the 'start'
77	// in increasing order.
78	static inline int cmp_memory_block(const void* a, const void* b) {
79	const size_t start_a = ((const struct memory_block*)a)->start;
80	const size_t start_b = ((const struct memory_block*)b)->start;
81	return (start_a > start_b) - (start_a < start_b);
82	}
83
84	// Given the current live memory blocks, return the offset in a memory arena for a to-be-allocated value of size
85	// 'to_alloc_size'.
86	static size_t find_value_alloc_offset(struct memory_block* live_mem_blocks,
87	size_t num_mem_blocks,
88	size_t to_alloc_size) {
89	if (num_mem_blocks == `0`) {
90	return `0`;
91	}
92
93	if (num_mem_blocks == `1`) {
94	return live_mem_blocks[`0`].end;
95	}
96
97	// Sort memory blocks according to 'start' in increasing order.
98	qsort(live_mem_blocks, num_mem_blocks, sizeof(struct memory_block), cmp_memory_block);
99
100	// Coalesce overlapping or immediate adjacent memory blocks to form a list of non-overlapping memory blocks in order
101	// to find the smallest gap.
102	size_t num_coalesced_mem_blocks = `1`;
103	for (size_t i = `1`; i < num_mem_blocks; ++i) {
104	const size_t current_coalesced_end =
105	live_mem_blocks[num_coalesced_mem_blocks - `1`].end;
106	if (live_mem_blocks[i].start > current_coalesced_end) {
107	assert(num_coalesced_mem_blocks <= i);
108	live_mem_blocks[num_coalesced_mem_blocks] = live_mem_blocks[i];
109	num_coalesced_mem_blocks++;
110	continue;
111	}
112	if (live_mem_blocks[i].end > current_coalesced_end) {
113	live_mem_blocks[num_coalesced_mem_blocks - `1`].end = live_mem_blocks[i].end;
114	}
115	}
116
117	size_t smallest_gap_size = SIZE_MAX;
118	// The first index to live_mem_blocks that the 'to_alloc_size' should be allocated after.
119	size_t smallest_gap_index = num_coalesced_mem_blocks - `1`;
120	for (size_t i = `0`; i < num_coalesced_mem_blocks - `1`; ++i) {
121	assert(live_mem_blocks[i + `1`].start > live_mem_blocks[i].end);
122	const size_t gap = live_mem_blocks[i + `1`].start - live_mem_blocks[i].end;
123	if (gap >= to_alloc_size && gap < smallest_gap_size) {
124	smallest_gap_index = i;
125	smallest_gap_size = gap;
126	}
127	}
128	return live_mem_blocks[smallest_gap_index].end;
129	}
130
131	void xnn_init_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker, const xnn_subgraph_t subgraph) {
132	tracker->subgraph = subgraph;
133	tracker->mem_arena_size = `0`;
134	tracker->usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage) * subgraph->num_values);
135	#if XNN_ENABLE_MEMOPT
136	populate_value_lifecycle(tracker->subgraph, tracker->usage);
137	#endif
138	tracker->min_value_id = XNN_INVALID_VALUE_ID;
139	tracker->max_value_id = XNN_INVALID_VALUE_ID;
140	}
141
142	void xnn_mark_tensor_as_reuse(struct xnn_value_allocation_tracker* tracker,
143	uint32_t value_id,
144	uint32_t reuse_value_id,
145	uint32_t new_last_node) {
146	// Set tensor_size to 0 so memory planner will not try to find memory for these tensors.
147	tracker->usage[value_id].tensor_size = `0`;
148	tracker->usage[value_id].reuse_value_id = reuse_value_id;
149	// The reused tensor has an expanded live-range.
150	tracker->usage[reuse_value_id].last_node = new_last_node;
151	}
152
153	void xnn_add_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker,
154	uint32_t value_id,
155	size_t tensor_size) {
156	tracker->usage[value_id].tensor_size = tensor_size;
157	if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
158	tracker->min_value_id = value_id;
159	} else {
160	// Note that values are expected to be added in increasing order.
161	assert(value_id > tracker->min_value_id);
162	assert(value_id > tracker->max_value_id);
163	}
164
165	tracker->max_value_id = value_id;
166	}
167
168	void xnn_plan_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker) {
169	#if XNN_ENABLE_MEMOPT
170	if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
171	assert(tracker->max_value_id == XNN_INVALID_VALUE_ID);
172	return;
173	}
174
175	const uint32_t num_values = tracker->max_value_id - tracker->min_value_id + `1`;
176	struct xnn_value_usage sorted_usage = xnn_allocate_zero_memory(sizeof(struct** xnn_value_usage) num_values);
177	size_t num_values_to_alloc = `0`;
178	for (size_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
179	struct xnn_value_usage* info = tracker->usage + i;
180	if (info->tensor_size != `0`) {
181	sorted_usage[num_values_to_alloc++] = info;
182	}
183	}
184	qsort(sorted_usage, num_values_to_alloc, sizeof(struct xnn_value_usage*), cmp_value_usage_tensor_size);
185
186	// Start the allocation planning process.
187	struct memory_block* current_live_mem_blocks = xnn_allocate_zero_memory(
188	sizeof(struct memory_block) * num_values_to_alloc);
189	size_t mem_arena_size = `0`;
190	for (size_t i = `0`; i < num_values_to_alloc; ++i) {
191	size_t num_live_mem_blocks = `0`;
192	struct xnn_value_usage* current = sorted_usage[i];
193	for (size_t j = `0`; j < i; ++j) {
194	const struct xnn_value_usage* allocated = sorted_usage[j];
195	if (value_lifecycle_overlap(current, allocated)) {
196	current_live_mem_blocks[num_live_mem_blocks++] = (struct memory_block){
197	.start = allocated->alloc_offset,
198	.end = allocated->alloc_offset + allocated->tensor_size,
199	};
200	}
201	}
202	current->alloc_offset = find_value_alloc_offset(current_live_mem_blocks, num_live_mem_blocks, current->tensor_size);
203	if (mem_arena_size < current->alloc_offset + current->tensor_size) {
204	mem_arena_size = current->alloc_offset + current->tensor_size;
205	}
206	}
207
208	// Walk through all tensors that are reusing memory, and update their usage records.
209	for (size_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
210	struct xnn_value_usage* usage = &tracker->usage[i];
211	uint32_t reuse_id = usage->reuse_value_id;
212	if (reuse_id == XNN_INVALID_VALUE_ID) {
213	continue;
214	}
215	assert(tracker->usage[reuse_id].alloc_offset != SIZE_MAX);
216	usage->alloc_offset = tracker->usage[reuse_id].alloc_offset;
217	}
218
219	tracker->mem_arena_size = mem_arena_size;
220	xnn_release_memory(sorted_usage);
221	xnn_release_memory(current_live_mem_blocks);
222	#else
223	tracker->mem_arena_size = `0`;
224	for (uint32_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
225	if (tracker->usage[i].tensor_size > `0`) {
226	tracker->usage[i].alloc_offset = tracker->mem_arena_size;
227	tracker->mem_arena_size += tracker->usage[i].tensor_size;
228	}
229	}
230	#endif
231	}
232

Browse the source code of tensorflow/external/XNNPACK/src/memory-planner.c