1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file relay/backend/token_allocator.cc
22 * \brief Token allocation classes for backend
23 */
24
25#include "token_allocator.h"
26
27#include <tvm/tir/op.h>
28
29#include <algorithm>
30#include <limits>
31
32namespace tvm {
33namespace relay {
34
35size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
36 TensorType ttype = prototype->ttype;
37 ICHECK(ttype.defined());
38 size_t size = 1;
39 for (IndexExpr dim : ttype->shape) {
40 const int64_t* pval = tir::as_const_int(dim);
41 ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
42 ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
43 size *= static_cast<size_t>(pval[0]);
44 }
45 size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
46 return size;
47}
48
49StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
50 // calculate the size;
51 size_t size = GetMemorySize(prototype);
52 // search memory block in [size / match_range_, size * match_range_)
53 if (match_range_ == 0) {
54 return nullptr;
55 }
56 auto begin = free_.lower_bound(size / match_range_);
57 auto mid = free_.lower_bound(size);
58 auto end = free_.upper_bound(size * match_range_);
59 // search for memory blocks larger than requested
60 for (auto it = mid; it != end; ++it) {
61 StorageToken* tok = it->second;
62 if (!tok->is_compatible(*prototype)) continue;
63 ICHECK_EQ(tok->ref_counter, 0);
64 // Use exect matching strategy
65 tok->max_bytes = std::max(size, tok->max_bytes);
66 tok->ref_counter = prototype->ref_counter;
67 // find a exact match, erase from map and return
68 free_.erase(it);
69 return tok;
70 }
71 // then search for memory blocks smaller than requested space
72 for (auto it = mid; it != begin;) {
73 --it;
74 StorageToken* tok = it->second;
75 if (!tok->is_compatible(*prototype)) continue;
76 ICHECK_EQ(tok->ref_counter, 0);
77 // Use exect matching strategy
78 tok->max_bytes = std::max(size, tok->max_bytes);
79 tok->ref_counter = prototype->ref_counter;
80 // erase from map and return
81 free_.erase(it);
82 return tok;
83 }
84 return nullptr;
85}
86
87StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
88 size_t size = GetMemorySize(prototype);
89 prototype->max_bytes = size;
90 prototype->storage_id = storage_id;
91 data_.push_back(prototype);
92 return prototype;
93}
94
95void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
96 ICHECK_GE(tok->storage_id, 0);
97 ICHECK_GE(tok->ref_counter, 0);
98 if (tok->ref_counter == 0) {
99 free_.insert({tok->max_bytes, tok});
100 }
101}
102
103StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
104 auto shape = GetSize2D(prototype);
105 const int64_t max_ratio = 5;
106 int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
107 int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
108 int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
109 int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
110 int64_t best_storage_id = -1;
111 MemBlock new_mem;
112 for (int64_t free_id : free_list_) {
113 MemBlock& cached = blocks_[free_id];
114 // Can only reuse texture 2d blocks of the same type
115 if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
116 continue;
117 }
118 // Can only reuse texture 2d blocks of the same scope
119 // Because reusing textures with different memory scope may lead to
120 // accuracy issues, because the data will be packed in a different way for
121 // different memory scopes.
122 if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) {
123 continue;
124 }
125 // avoid reusing too small and too big textures
126 if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio ||
127 shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) {
128 continue;
129 }
130 int64_t new_width = std::max(cached.x_, shape.width);
131 int64_t new_height = std::max(cached.y_, shape.height);
132 int64_t added_size_x = new_width - cached.x_;
133 int64_t added_size_y = new_height - cached.y_;
134 int64_t wasted_size_x = new_width - shape.width;
135 int64_t wasted_size_y = new_height - shape.height;
136 // Prioritize minimization of added size first, then minimize
137 // wasted size among blocks which would not require expansion
138 if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
139 (min_added_size_y > 0 && added_size_y < min_added_size_y) ||
140 (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) ||
141 (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
142 min_added_size_x = added_size_x;
143 min_added_size_y = added_size_y;
144 min_wasted_size_x = wasted_size_x;
145 min_wasted_size_y = wasted_size_y;
146 best_storage_id = free_id;
147 new_mem.x_ = new_width;
148 new_mem.y_ = new_height;
149 }
150 }
151
152 if (min_added_size_x == 0 && min_added_size_y == 0) {
153 // use existing block
154 free_list_.erase(best_storage_id);
155 blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
156 return blocks_[best_storage_id].token_;
157 } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) {
158 // Reset the reference counter of the now live token
159 free_list_.erase(best_storage_id);
160 new_mem.token_ = prototype;
161 new_mem.token_->ref_counter += 1;
162 new_mem.token_->storage_id = best_storage_id;
163 blocks_[best_storage_id] = new_mem;
164 return new_mem.token_;
165 }
166 return nullptr;
167}
168
169StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
170 auto shape = GetSize2D(prototype);
171 MemBlock block;
172 block.x_ = shape.width;
173 block.y_ = shape.height;
174 prototype->storage_id = storage_id;
175 block.token_ = prototype;
176 blocks_[prototype->storage_id] = block;
177 return prototype;
178}
179
180void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
181 ICHECK_GE(tok->storage_id, 0);
182 ICHECK_GE(tok->ref_counter, 0);
183 if (tok->ref_counter == 0) {
184 free_list_.insert(tok->storage_id);
185 }
186}
187
188runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
189 TensorType ttype = prototype->ttype;
190 ICHECK(ttype.defined());
191 size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
192 prototype->virtual_device->memory_scope);
193 struct Shape {
194 const Array<PrimExpr>& shape;
195 int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
196 };
197 return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
198}
199
200} // namespace relay
201} // namespace tvm
202