1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef _GEN_REGISTER_ALLOCATOR_HPP__ |
18 | #define _GEN_REGISTER_ALLOCATOR_HPP__ |
19 | |
20 | #include "ngen.hpp" |
21 | #include <cstdint> |
22 | #include <stdexcept> |
23 | |
24 | namespace ngen { |
25 | |
26 | // Gen registers are organized in banks of bundles. |
27 | // Each bundle is modeled as groups of contiguous registers separated by a stride. |
28 | struct Bundle { |
29 | static const int8_t any = -1; |
30 | |
31 | int8_t bundle_id; |
32 | int8_t bank_id; |
33 | |
34 | Bundle() : bundle_id(any), bank_id(any) {} |
35 | Bundle(int8_t bank_id_, int8_t bundle_id_) : bundle_id(bundle_id_), bank_id(bank_id_) {} |
36 | |
37 | // Number of bundles in each bank (per thread). |
38 | static constexpr int bundle_count(HW hw) { return (hw >= HW::XeHP) ? 16 : (hw == HW::Gen12LP) ? 8 : 2; } |
39 | // Number of banks. |
40 | static constexpr int bank_count(HW hw) { return 2; } |
41 | |
42 | static Bundle locate(HW hw, RegData reg); |
43 | |
44 | int first_reg(HW hw) const; // The first register in the bundle. |
45 | int group_size(HW hw) const; // Number of registers in each contiguous group of the bundle. |
46 | int stride(HW hw) const; // Stride between register groups of the bundle. |
47 | |
48 | int64_t reg_mask(HW hw, int offset) const; // Get register mask for this bundle, for registers [64*offset, 64*(offset+1)). |
49 | |
50 | friend constexpr bool operator==(const Bundle &b1, const Bundle &b2) { |
51 | return b1.bundle_id == b2.bundle_id && b1.bank_id == b2.bank_id; |
52 | } |
53 | |
54 | static bool conflicts(HW hw, RegData r1, RegData r2) { |
55 | return !r1.isNull() && !r2.isNull() && (locate(hw, r1) == locate(hw, r2)); |
56 | } |
57 | |
58 | static bool same_bank(HW hw, RegData r1, RegData r2) { |
59 | return !r1.isNull() && !r2.isNull() && (locate(hw, r1).bank_id == locate(hw, r2).bank_id); |
60 | } |
61 | }; |
62 | |
63 | // A group of register bundles. |
64 | struct BundleGroup { |
65 | explicit BundleGroup(HW hw_) : hw(hw_) {} |
66 | |
67 | static BundleGroup AllBundles() { |
68 | BundleGroup bg{HW::Gen9}; |
69 | for (int rchunk = 0; rchunk < nmasks; rchunk++) |
70 | bg.reg_masks[rchunk] = ~uint64_t(0); |
71 | return bg; |
72 | } |
73 | |
74 | friend BundleGroup operator|(BundleGroup lhs, Bundle rhs) { lhs |= rhs; return lhs; } |
75 | BundleGroup &operator|=(Bundle rhs) { |
76 | for (int rchunk = 0; rchunk < nmasks; rchunk++) |
77 | reg_masks[rchunk] |= rhs.reg_mask(hw, rchunk); |
78 | return *this; |
79 | } |
80 | |
81 | BundleGroup operator~() { |
82 | auto result = *this; |
83 | for (int rchunk = 0; rchunk < nmasks; rchunk++) |
84 | result.reg_masks[rchunk] = ~reg_masks[rchunk]; |
85 | return result; |
86 | } |
87 | |
88 | uint64_t reg_mask(int rchunk) const { |
89 | return (rchunk < nmasks) ? reg_masks[rchunk % nmasks] : 0; |
90 | } |
91 | |
92 | private: |
93 | HW hw; |
94 | |
95 | static constexpr int max_regs = 256; |
96 | static constexpr int nmasks = max_regs / 64; |
97 | |
98 | uint64_t reg_masks[nmasks] = {0}; |
99 | }; |
100 | |
101 | // Gen register allocator. |
102 | class RegisterAllocator { |
103 | public: |
104 | explicit RegisterAllocator(HW hw_) : hw(hw_) { init(); } |
105 | |
106 | HW hardware() const { return hw; } |
107 | |
108 | // Allocation functions: sub-GRFs, full GRFs, and GRF ranges. |
109 | GRFRange alloc_range(int nregs, Bundle base_bundle = Bundle(), |
110 | BundleGroup bundle_mask = BundleGroup::AllBundles()); |
111 | GRF alloc(Bundle bundle = Bundle()) { return alloc_range(1, bundle)[0]; } |
112 | |
113 | Subregister alloc_sub(DataType type, Bundle bundle = Bundle()); |
114 | template <typename T> |
115 | Subregister alloc_sub(Bundle bundle = Bundle()) { return alloc_sub(getDataType<T>(), bundle); } |
116 | |
117 | FlagRegister alloc_flag(); |
118 | |
119 | // Attempted allocation. Return value is invalid if allocation failed. |
120 | GRFRange try_alloc_range(int nregs, Bundle base_bundle = Bundle(), |
121 | BundleGroup bundle_mask = BundleGroup::AllBundles()); |
122 | GRF try_alloc(Bundle bundle = Bundle()); |
123 | |
124 | Subregister try_alloc_sub(DataType type, Bundle bundle = Bundle()); |
125 | template <typename T> |
126 | Subregister try_alloc_sub(Bundle bundle = Bundle()) { return try_alloc_sub(getDataType<T>(), bundle); } |
127 | |
128 | FlagRegister try_alloc_flag(); |
129 | |
130 | // Release a previous allocation or claim. |
131 | void release(GRF reg); |
132 | void release(GRFRange range); |
133 | void release(Subregister subreg); |
134 | void release(FlagRegister flag); |
135 | |
136 | template <typename RD> |
137 | void safeRelease(RD ®) { release(reg); reg.invalidate(); } |
138 | |
139 | // Claim specific registers. |
140 | void claim(GRF reg); |
141 | void claim(GRFRange range); |
142 | void claim(Subregister subreg); |
143 | void claim(FlagRegister flag); |
144 | |
145 | // Set register count. |
146 | void setRegisterCount(int rcount); |
147 | int getRegisterCount() const { return reg_count;} |
148 | |
149 | int countAllocedRegisters() const; |
150 | |
151 | void dump(std::ostream &str); |
152 | |
153 | protected: |
154 | static constexpr int max_regs = 256; |
155 | using mtype = uint16_t; |
156 | |
157 | HW hw; // HW generation. |
158 | uint8_t free_whole[max_regs / 8]; // Bitmap of free whole GRFs. |
159 | mtype free_sub[max_regs]; // Bitmap of free partial GRFs, at dword granularity. |
160 | uint16_t reg_count; // # of registers. |
161 | uint8_t free_flag; // Bitmap of free flag registers. |
162 | mtype fullSubMask; |
163 | |
164 | void init(); |
165 | void claim_sub(int r, int o, int dw); |
166 | }; |
167 | |
168 | |
169 | // Exceptions. |
170 | class out_of_registers_exception : public std::runtime_error { |
171 | public: |
172 | out_of_registers_exception() : std::runtime_error("Insufficient registers in requested bundle" ) {} |
173 | }; |
174 | |
175 | } /* namespace ngen */ |
176 | |
177 | #endif /* include guard */ |
178 | |