token_allocator.cc source code [tvm/src/relay/backend/token_allocator.cc]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file relay/backend/token_allocator.cc
22	* \brief Token allocation classes for backend
23	*/
24
25	#include "token_allocator.h"
26
27	#include <tvm/tir/op.h>
28
29	#include <algorithm>
30	#include <limits>
31
32	namespace tvm {
33	namespace relay {
34
35	size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
36	TensorType ttype = prototype->ttype;
37	ICHECK(ttype.defined());
38	size_t size = `1`;
39	for (IndexExpr dim : ttype ->shape) {
40	const int64_t* pval = tir::as_const_int(dim);
41	ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype ->shape;
42	ICHECK_GE(pval, `0`) << "Cannot allocate memory for tensor with negative shape" << pval;
43	size = static_cast*<size_t>(pval[`0`]);
44	}
45	size = DivRoundUp(ttype ->dtype.bits() ttype ->dtype.lanes(), `8`);
46	return size;
47	}
48
49	StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
50	// calculate the size;
51	size_t size = GetMemorySize(prototype);
52	// search memory block in [size / match_range_, size match_range_)*
53	if (match_range_ == `0`) {
54	return nullptr;
55	}
56	auto begin = free_.lower_bound(size / match_range_);
57	auto mid = free_.lower_bound(size);
58	auto end = free_.upper_bound(size * match_range_);
59	// search for memory blocks larger than requested
60	for (auto it = mid; it != end; ++it) {
61	StorageToken* tok = it ->second;
62	if (!tok->is_compatible(prototype)) continue*;
63	ICHECK_EQ(tok->ref_counter, `0`);
64	// Use exect matching strategy
65	tok->max_bytes = std::max(size, tok->max_bytes);
66	tok->ref_counter = prototype->ref_counter;
67	// find a exact match, erase from map and return
68	free_.erase(it);
69	return tok;
70	}
71	// then search for memory blocks smaller than requested space
72	for (auto it = mid; it != begin;) {
73	--it;
74	StorageToken* tok = it ->second;
75	if (!tok->is_compatible(prototype)) continue*;
76	ICHECK_EQ(tok->ref_counter, `0`);
77	// Use exect matching strategy
78	tok->max_bytes = std::max(size, tok->max_bytes);
79	tok->ref_counter = prototype->ref_counter;
80	// erase from map and return
81	free_.erase(it);
82	return tok;
83	}
84	return nullptr;
85	}
86
87	StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
88	size_t size = GetMemorySize(prototype);
89	prototype->max_bytes = size;
90	prototype->storage_id = storage_id;
91	data_.push_back(prototype);
92	return prototype;
93	}
94
95	void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
96	ICHECK_GE(tok->storage_id, `0`);
97	ICHECK_GE(tok->ref_counter, `0`);
98	if (tok->ref_counter == `0`) {
99	free_.insert({tok->max_bytes, tok});
100	}
101	}
102
103	StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
104	auto shape = GetSize2D(prototype);
105	const int64_t max_ratio = `5`;
106	int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
107	int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
108	int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
109	int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
110	int64_t best_storage_id = -`1`;
111	MemBlock new_mem;
112	for (int64_t free_id : free_list_) {
113	MemBlock& cached = blocks_[free_id];
114	// Can only reuse texture 2d blocks of the same type
115	if (cached.token_->ttype ->dtype != prototype->ttype ->dtype) {
116	continue;
117	}
118	// Can only reuse texture 2d blocks of the same scope
119	// Because reusing textures with different memory scope may lead to
120	// accuracy issues, because the data will be packed in a different way for
121	// different memory scopes.
122	if (cached.token_->virtual_device ->memory_scope != prototype->virtual_device ->memory_scope) {
123	continue;
124	}
125	// avoid reusing too small and too big textures
126	if (shape.width / cached.x_ > max_ratio \|\| cached.x_ / shape.width > max_ratio \|\|
127	shape.height / cached.y_ > max_ratio \|\| cached.y_ / shape.height > max_ratio) {
128	continue;
129	}
130	int64_t new_width = std::max(cached.x_, shape.width);
131	int64_t new_height = std::max(cached.y_, shape.height);
132	int64_t added_size_x = new_width - cached.x_;
133	int64_t added_size_y = new_height - cached.y_;
134	int64_t wasted_size_x = new_width - shape.width;
135	int64_t wasted_size_y = new_height - shape.height;
136	// Prioritize minimization of added size first, then minimize
137	// wasted size among blocks which would not require expansion
138	if ((min_added_size_x > `0` && added_size_x < min_added_size_x) \|\|
139	(min_added_size_y > `0` && added_size_y < min_added_size_y) \|\|
140	(min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) \|\|
141	(min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
142	min_added_size_x = added_size_x;
143	min_added_size_y = added_size_y;
144	min_wasted_size_x = wasted_size_x;
145	min_wasted_size_y = wasted_size_y;
146	best_storage_id = free_id;
147	new_mem.x_ = new_width;
148	new_mem.y_ = new_height;
149	}
150	}
151
152	if (min_added_size_x == `0` && min_added_size_y == `0`) {
153	// use existing block
154	free_list_.erase(best_storage_id);
155	blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
156	return blocks_[best_storage_id].token_;
157	} else if (min_added_size_x <= shape.width \|\| min_added_size_y <= shape.height) {
158	// Reset the reference counter of the now live token
159	free_list_.erase(best_storage_id);
160	new_mem.token_ = prototype;
161	new_mem.token_->ref_counter += `1`;
162	new_mem.token_->storage_id = best_storage_id;
163	blocks_[best_storage_id] = new_mem;
164	return new_mem.token_;
165	}
166	return nullptr;
167	}
168
169	StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
170	auto shape = GetSize2D(prototype);
171	MemBlock block;
172	block.x_ = shape.width;
173	block.y_ = shape.height;
174	prototype->storage_id = storage_id;
175	block.token_ = prototype;
176	blocks_[prototype->storage_id] = block;
177	return prototype;
178	}
179
180	void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
181	ICHECK_GE(tok->storage_id, `0`);
182	ICHECK_GE(tok->ref_counter, `0`);
183	if (tok->ref_counter == `0`) {
184	free_list_.insert(tok->storage_id);
185	}
186	}
187
188	runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
189	TensorType ttype = prototype->ttype;
190	ICHECK(ttype.defined());
191	size_t axis = runtime::DefaultTextureLayoutSeparator(ttype ->shape.size(),
192	prototype->virtual_device ->memory_scope);
193	struct Shape {
194	const Array<PrimExpr>& shape;
195	int64_t operator[](size_t i) const { return *tir::as_const_int(shape [i]); }
196	};
197	return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype ->shape}, ttype ->shape.size(), axis);
198	}
199
200	} // namespace relay
201	} // namespace tvm
202

Browse the source code of tvm/src/relay/backend/token_allocator.cc