1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file relay/backend/token_allocator.cc |
22 | * \brief Token allocation classes for backend |
23 | */ |
24 | |
25 | #include "token_allocator.h" |
26 | |
27 | #include <tvm/tir/op.h> |
28 | |
29 | #include <algorithm> |
30 | #include <limits> |
31 | |
32 | namespace tvm { |
33 | namespace relay { |
34 | |
35 | size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) { |
36 | TensorType ttype = prototype->ttype; |
37 | ICHECK(ttype.defined()); |
38 | size_t size = 1; |
39 | for (IndexExpr dim : ttype->shape) { |
40 | const int64_t* pval = tir::as_const_int(dim); |
41 | ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape; |
42 | ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval; |
43 | size *= static_cast<size_t>(pval[0]); |
44 | } |
45 | size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8); |
46 | return size; |
47 | } |
48 | |
49 | StorageToken* TokenAllocator1D::Request(StorageToken* prototype) { |
50 | // calculate the size; |
51 | size_t size = GetMemorySize(prototype); |
52 | // search memory block in [size / match_range_, size * match_range_) |
53 | if (match_range_ == 0) { |
54 | return nullptr; |
55 | } |
56 | auto begin = free_.lower_bound(size / match_range_); |
57 | auto mid = free_.lower_bound(size); |
58 | auto end = free_.upper_bound(size * match_range_); |
59 | // search for memory blocks larger than requested |
60 | for (auto it = mid; it != end; ++it) { |
61 | StorageToken* tok = it->second; |
62 | if (!tok->is_compatible(*prototype)) continue; |
63 | ICHECK_EQ(tok->ref_counter, 0); |
64 | // Use exect matching strategy |
65 | tok->max_bytes = std::max(size, tok->max_bytes); |
66 | tok->ref_counter = prototype->ref_counter; |
67 | // find a exact match, erase from map and return |
68 | free_.erase(it); |
69 | return tok; |
70 | } |
71 | // then search for memory blocks smaller than requested space |
72 | for (auto it = mid; it != begin;) { |
73 | --it; |
74 | StorageToken* tok = it->second; |
75 | if (!tok->is_compatible(*prototype)) continue; |
76 | ICHECK_EQ(tok->ref_counter, 0); |
77 | // Use exect matching strategy |
78 | tok->max_bytes = std::max(size, tok->max_bytes); |
79 | tok->ref_counter = prototype->ref_counter; |
80 | // erase from map and return |
81 | free_.erase(it); |
82 | return tok; |
83 | } |
84 | return nullptr; |
85 | } |
86 | |
87 | StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) { |
88 | size_t size = GetMemorySize(prototype); |
89 | prototype->max_bytes = size; |
90 | prototype->storage_id = storage_id; |
91 | data_.push_back(prototype); |
92 | return prototype; |
93 | } |
94 | |
95 | void TokenAllocator1D::CheckForRelease(StorageToken* tok) { |
96 | ICHECK_GE(tok->storage_id, 0); |
97 | ICHECK_GE(tok->ref_counter, 0); |
98 | if (tok->ref_counter == 0) { |
99 | free_.insert({tok->max_bytes, tok}); |
100 | } |
101 | } |
102 | |
103 | StorageToken* TokenAllocator2D::Request(StorageToken* prototype) { |
104 | auto shape = GetSize2D(prototype); |
105 | const int64_t max_ratio = 5; |
106 | int64_t min_added_size_x = std::numeric_limits<int64_t>::max(); |
107 | int64_t min_added_size_y = std::numeric_limits<int64_t>::max(); |
108 | int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max(); |
109 | int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max(); |
110 | int64_t best_storage_id = -1; |
111 | MemBlock new_mem; |
112 | for (int64_t free_id : free_list_) { |
113 | MemBlock& cached = blocks_[free_id]; |
114 | // Can only reuse texture 2d blocks of the same type |
115 | if (cached.token_->ttype->dtype != prototype->ttype->dtype) { |
116 | continue; |
117 | } |
118 | // Can only reuse texture 2d blocks of the same scope |
119 | // Because reusing textures with different memory scope may lead to |
120 | // accuracy issues, because the data will be packed in a different way for |
121 | // different memory scopes. |
122 | if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) { |
123 | continue; |
124 | } |
125 | // avoid reusing too small and too big textures |
126 | if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio || |
127 | shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) { |
128 | continue; |
129 | } |
130 | int64_t new_width = std::max(cached.x_, shape.width); |
131 | int64_t new_height = std::max(cached.y_, shape.height); |
132 | int64_t added_size_x = new_width - cached.x_; |
133 | int64_t added_size_y = new_height - cached.y_; |
134 | int64_t wasted_size_x = new_width - shape.width; |
135 | int64_t wasted_size_y = new_height - shape.height; |
136 | // Prioritize minimization of added size first, then minimize |
137 | // wasted size among blocks which would not require expansion |
138 | if ((min_added_size_x > 0 && added_size_x < min_added_size_x) || |
139 | (min_added_size_y > 0 && added_size_y < min_added_size_y) || |
140 | (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) || |
141 | (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) { |
142 | min_added_size_x = added_size_x; |
143 | min_added_size_y = added_size_y; |
144 | min_wasted_size_x = wasted_size_x; |
145 | min_wasted_size_y = wasted_size_y; |
146 | best_storage_id = free_id; |
147 | new_mem.x_ = new_width; |
148 | new_mem.y_ = new_height; |
149 | } |
150 | } |
151 | |
152 | if (min_added_size_x == 0 && min_added_size_y == 0) { |
153 | // use existing block |
154 | free_list_.erase(best_storage_id); |
155 | blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter; |
156 | return blocks_[best_storage_id].token_; |
157 | } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) { |
158 | // Reset the reference counter of the now live token |
159 | free_list_.erase(best_storage_id); |
160 | new_mem.token_ = prototype; |
161 | new_mem.token_->ref_counter += 1; |
162 | new_mem.token_->storage_id = best_storage_id; |
163 | blocks_[best_storage_id] = new_mem; |
164 | return new_mem.token_; |
165 | } |
166 | return nullptr; |
167 | } |
168 | |
169 | StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) { |
170 | auto shape = GetSize2D(prototype); |
171 | MemBlock block; |
172 | block.x_ = shape.width; |
173 | block.y_ = shape.height; |
174 | prototype->storage_id = storage_id; |
175 | block.token_ = prototype; |
176 | blocks_[prototype->storage_id] = block; |
177 | return prototype; |
178 | } |
179 | |
180 | void TokenAllocator2D::CheckForRelease(StorageToken* tok) { |
181 | ICHECK_GE(tok->storage_id, 0); |
182 | ICHECK_GE(tok->ref_counter, 0); |
183 | if (tok->ref_counter == 0) { |
184 | free_list_.insert(tok->storage_id); |
185 | } |
186 | } |
187 | |
188 | runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) { |
189 | TensorType ttype = prototype->ttype; |
190 | ICHECK(ttype.defined()); |
191 | size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(), |
192 | prototype->virtual_device->memory_scope); |
193 | struct Shape { |
194 | const Array<PrimExpr>& shape; |
195 | int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); } |
196 | }; |
197 | return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis); |
198 | } |
199 | |
200 | } // namespace relay |
201 | } // namespace tvm |
202 | |