1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef _GEN_REGISTER_ALLOCATOR_HPP__
18#define _GEN_REGISTER_ALLOCATOR_HPP__
19
20#include "ngen.hpp"
21#include <cstdint>
22#include <stdexcept>
23
24namespace ngen {
25
26// Gen registers are organized in banks of bundles.
27// Each bundle is modeled as groups of contiguous registers separated by a stride.
28struct Bundle {
29 static const int8_t any = -1;
30
31 int8_t bundle_id;
32 int8_t bank_id;
33
34 Bundle() : bundle_id(any), bank_id(any) {}
35 Bundle(int8_t bank_id_, int8_t bundle_id_) : bundle_id(bundle_id_), bank_id(bank_id_) {}
36
37 // Number of bundles in each bank (per thread).
38 static constexpr int bundle_count(HW hw) { return (hw >= HW::XeHP) ? 16 : (hw == HW::Gen12LP) ? 8 : 2; }
39 // Number of banks.
40 static constexpr int bank_count(HW hw) { return 2; }
41
42 static Bundle locate(HW hw, RegData reg);
43
44 int first_reg(HW hw) const; // The first register in the bundle.
45 int group_size(HW hw) const; // Number of registers in each contiguous group of the bundle.
46 int stride(HW hw) const; // Stride between register groups of the bundle.
47
48 int64_t reg_mask(HW hw, int offset) const; // Get register mask for this bundle, for registers [64*offset, 64*(offset+1)).
49
50 friend constexpr bool operator==(const Bundle &b1, const Bundle &b2) {
51 return b1.bundle_id == b2.bundle_id && b1.bank_id == b2.bank_id;
52 }
53
54 static bool conflicts(HW hw, RegData r1, RegData r2) {
55 return !r1.isNull() && !r2.isNull() && (locate(hw, r1) == locate(hw, r2));
56 }
57
58 static bool same_bank(HW hw, RegData r1, RegData r2) {
59 return !r1.isNull() && !r2.isNull() && (locate(hw, r1).bank_id == locate(hw, r2).bank_id);
60 }
61};
62
63// A group of register bundles.
64struct BundleGroup {
65 explicit BundleGroup(HW hw_) : hw(hw_) {}
66
67 static BundleGroup AllBundles() {
68 BundleGroup bg{HW::Gen9};
69 for (int rchunk = 0; rchunk < nmasks; rchunk++)
70 bg.reg_masks[rchunk] = ~uint64_t(0);
71 return bg;
72 }
73
74 friend BundleGroup operator|(BundleGroup lhs, Bundle rhs) { lhs |= rhs; return lhs; }
75 BundleGroup &operator|=(Bundle rhs) {
76 for (int rchunk = 0; rchunk < nmasks; rchunk++)
77 reg_masks[rchunk] |= rhs.reg_mask(hw, rchunk);
78 return *this;
79 }
80
81 BundleGroup operator~() {
82 auto result = *this;
83 for (int rchunk = 0; rchunk < nmasks; rchunk++)
84 result.reg_masks[rchunk] = ~reg_masks[rchunk];
85 return result;
86 }
87
88 uint64_t reg_mask(int rchunk) const {
89 return (rchunk < nmasks) ? reg_masks[rchunk % nmasks] : 0;
90 }
91
92private:
93 HW hw;
94
95 static constexpr int max_regs = 256;
96 static constexpr int nmasks = max_regs / 64;
97
98 uint64_t reg_masks[nmasks] = {0};
99};
100
101// Gen register allocator.
102class RegisterAllocator {
103public:
104 explicit RegisterAllocator(HW hw_) : hw(hw_) { init(); }
105
106 HW hardware() const { return hw; }
107
108 // Allocation functions: sub-GRFs, full GRFs, and GRF ranges.
109 GRFRange alloc_range(int nregs, Bundle base_bundle = Bundle(),
110 BundleGroup bundle_mask = BundleGroup::AllBundles());
111 GRF alloc(Bundle bundle = Bundle()) { return alloc_range(1, bundle)[0]; }
112
113 Subregister alloc_sub(DataType type, Bundle bundle = Bundle());
114 template <typename T>
115 Subregister alloc_sub(Bundle bundle = Bundle()) { return alloc_sub(getDataType<T>(), bundle); }
116
117 FlagRegister alloc_flag();
118
119 // Attempted allocation. Return value is invalid if allocation failed.
120 GRFRange try_alloc_range(int nregs, Bundle base_bundle = Bundle(),
121 BundleGroup bundle_mask = BundleGroup::AllBundles());
122 GRF try_alloc(Bundle bundle = Bundle());
123
124 Subregister try_alloc_sub(DataType type, Bundle bundle = Bundle());
125 template <typename T>
126 Subregister try_alloc_sub(Bundle bundle = Bundle()) { return try_alloc_sub(getDataType<T>(), bundle); }
127
128 FlagRegister try_alloc_flag();
129
130 // Release a previous allocation or claim.
131 void release(GRF reg);
132 void release(GRFRange range);
133 void release(Subregister subreg);
134 void release(FlagRegister flag);
135
136 template <typename RD>
137 void safeRelease(RD &reg) { release(reg); reg.invalidate(); }
138
139 // Claim specific registers.
140 void claim(GRF reg);
141 void claim(GRFRange range);
142 void claim(Subregister subreg);
143 void claim(FlagRegister flag);
144
145 // Set register count.
146 void setRegisterCount(int rcount);
147 int getRegisterCount() const { return reg_count;}
148
149 int countAllocedRegisters() const;
150
151 void dump(std::ostream &str);
152
153protected:
154 static constexpr int max_regs = 256;
155 using mtype = uint16_t;
156
157 HW hw; // HW generation.
158 uint8_t free_whole[max_regs / 8]; // Bitmap of free whole GRFs.
159 mtype free_sub[max_regs]; // Bitmap of free partial GRFs, at dword granularity.
160 uint16_t reg_count; // # of registers.
161 uint8_t free_flag; // Bitmap of free flag registers.
162 mtype fullSubMask;
163
164 void init();
165 void claim_sub(int r, int o, int dw);
166};
167
168
169// Exceptions.
170class out_of_registers_exception : public std::runtime_error {
171public:
172 out_of_registers_exception() : std::runtime_error("Insufficient registers in requested bundle") {}
173};
174
175} /* namespace ngen */
176
177#endif /* include guard */
178