1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | // nGEN: a C++ library for runtime Gen assembly generation. |
18 | // |
19 | // Macros that control nGEN's interface: |
20 | // NGEN_SAFE if defined, enables run-time safety checks. Exceptions will be thrown if checks fail. |
21 | // NGEN_SHORT_NAMES if defined, enables some short names (r[...] for indirect addressing, W for NoMask) |
22 | // NGEN_GLOBAL_REGS if defined, register names and instruction modifiers (r7, cr0, Switch, etc.) are |
23 | // global variables in the ngen namespace. Otherwise, they are members of the code |
24 | // generator classes |
25 | // NGEN_CPP11 if defined, ngen is C++11-compatible (C++17 not required) |
26 | |
27 | #ifndef NGEN_HPP |
28 | #define NGEN_HPP |
29 | |
30 | #include "ngen_config.hpp" |
31 | |
32 | #include <array> |
33 | #include <cstring> |
34 | #include <type_traits> |
35 | #include <vector> |
36 | |
37 | #include "ngen_core.hpp" |
38 | #include "ngen_auto_swsb.hpp" |
39 | |
40 | namespace ngen { |
41 | |
42 | // Forward declarations. |
43 | template <HW hw> class BinaryCodeGenerator; |
44 | template <HW hw> class ELFCodeGenerator; |
45 | |
46 | // MSVC v140 workaround for enum comparison in template arguments. |
47 | static constexpr bool hwLT(HW hw1, HW hw2) { return hw1 < hw2; } |
48 | static constexpr bool hwLE(HW hw1, HW hw2) { return hw1 <= hw2; } |
49 | static constexpr bool hwGE(HW hw1, HW hw2) { return hw1 >= hw2; } |
50 | static constexpr bool hwGT(HW hw1, HW hw2) { return hw1 > hw2; } |
51 | |
52 | // ----------------------------------------------------------------------- |
53 | |
54 | enum RegFiles : unsigned { |
55 | RegFileARF = 0, |
56 | RegFileGRF = 1, |
57 | RegFileIMM = 3, |
58 | }; |
59 | |
60 | inline unsigned getRegFile(const RegData &rd) { return rd.isARF() ? RegFileARF : RegFileGRF; } |
61 | inline unsigned getRegFile(const Align16Operand &o) { return getRegFile(o.getReg()); } |
62 | inline unsigned getRegFile(const ExtendedReg ®) { return getRegFile(reg.getBase()); } |
63 | inline unsigned getRegFile(const Immediate &imm) { return RegFileIMM; } |
64 | |
65 | // ----------------------------------------------------------------------- |
66 | // Binary formats, split between pre-Gen12 and post-Gen12. |
67 | |
68 | #include "ngen_gen8.hpp" |
69 | #include "ngen_gen12.hpp" |
70 | |
71 | // ----------------------------------------------------------------------- |
72 | |
73 | |
74 | class LabelFixup { |
75 | public: |
76 | uint32_t labelID; |
77 | int32_t anchor; |
78 | int32_t offset; |
79 | |
80 | LabelFixup(uint32_t labelID_, int32_t offset_) : labelID(labelID_), anchor(0), offset(offset_) {} |
81 | |
82 | static constexpr auto JIPOffset = 12; |
83 | static constexpr auto JIPOffsetJMPI = -4; |
84 | static constexpr auto UIPOffset = 8; |
85 | }; |
86 | |
87 | #if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED) |
88 | #define NGEN_GLOBAL_REGS_DEFINED |
89 | #include "ngen_registers.hpp" |
90 | #endif |
91 | |
92 | template <HW hw> |
93 | class BinaryCodeGenerator |
94 | { |
95 | friend class ELFCodeGenerator<hw>; |
96 | |
97 | public: |
98 | static constexpr HW hardware = hw; |
99 | |
100 | protected: |
101 | class InstructionStream { |
102 | friend class BinaryCodeGenerator; |
103 | |
104 | std::vector<LabelFixup> fixups; |
105 | std::vector<uint32_t> labels; |
106 | std::vector<uint64_t> code; |
107 | bool appended = false; |
108 | |
109 | int length() const { return int(code.size() * sizeof(uint64_t)); } |
110 | |
111 | void db(const Instruction8 &i) { |
112 | code.push_back(i.qword[0]); |
113 | code.push_back(i.qword[1]); |
114 | } |
115 | |
116 | void db(const Instruction12 &i) { |
117 | code.push_back(i.qword[0]); |
118 | code.push_back(i.qword[1]); |
119 | } |
120 | |
121 | void addFixup(LabelFixup fixup) { |
122 | fixup.anchor = length(); |
123 | fixups.push_back(fixup); |
124 | } |
125 | |
126 | void mark(Label &label, LabelManager &man) { |
127 | uint32_t id = label.getID(man); |
128 | |
129 | man.setTarget(id, length()); |
130 | labels.push_back(id); |
131 | } |
132 | |
133 | void fixLabels(LabelManager &man) { |
134 | for (const auto &fixup : fixups) { |
135 | int32_t target = man.getTarget(fixup.labelID); |
136 | uint8_t *field = ((uint8_t *) code.data()) + fixup.anchor + fixup.offset; |
137 | *((int32_t *) field) = target - fixup.anchor; |
138 | } |
139 | } |
140 | |
141 | void append(InstructionStream &other, LabelManager &man) { |
142 | auto offset = length(); |
143 | auto sz = code.size(); |
144 | |
145 | code.resize(sz + other.code.size()); |
146 | std::copy(other.code.begin(), other.code.end(), code.begin() + sz); |
147 | |
148 | sz = labels.size(); |
149 | labels.resize(sz + other.labels.size()); |
150 | std::copy(other.labels.begin(), other.labels.end(), labels.begin() + sz); |
151 | |
152 | for (LabelFixup fixup : other.fixups) { |
153 | fixup.anchor += offset; |
154 | fixups.push_back(fixup); |
155 | } |
156 | |
157 | #ifdef NGEN_SAFE |
158 | if (other.appended && !other.labels.empty()) |
159 | throw multiple_label_exception(); |
160 | #endif |
161 | |
162 | for (uint32_t id : other.labels) |
163 | man.offsetTarget(id, offset); |
164 | |
165 | other.appended = true; |
166 | } |
167 | |
168 | InstructionStream() {} |
169 | }; |
170 | |
171 | class Program { |
172 | friend class BinaryCodeGenerator; |
173 | using Instruction = typename std::conditional<(hw >= HW::XeHPC), InstructionXeHPC, Instruction12>::type; |
174 | std::vector<uint64_t> &code; |
175 | |
176 | Program(InstructionStream &stream) : code(stream.code) {}; |
177 | |
178 | public: |
179 | size_t size() const { return code.size() >> 1; } |
180 | Instruction &operator[](size_t index) { return *reinterpret_cast<Instruction *>(&code[index * 2]); } |
181 | const Instruction &operator[](size_t index) const { return *reinterpret_cast<Instruction *>(&code[index * 2]); } |
182 | }; |
183 | |
184 | static constexpr bool isGen12 = (hw >= HW::Gen12LP); |
185 | int stepping = 0; |
186 | |
187 | Label _labelLocalIDsLoaded; |
188 | Label _labelArgsLoaded; |
189 | |
190 | private: |
191 | InstructionModifier defaultModifier; |
192 | |
193 | LabelManager labelManager; |
194 | InstructionStream rootStream; |
195 | std::vector<InstructionStream*> streamStack; |
196 | |
197 | void db(const Instruction8 &i) { streamStack.back()->db(i); } |
198 | void db(const Instruction12 &i) { streamStack.back()->db(i); } |
199 | void addFixup(LabelFixup fixup) { streamStack.back()->addFixup(fixup); } |
200 | |
201 | template <bool forceWE = false, typename D, typename S0, HW hw_ = hw> |
202 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0); |
203 | template <bool forceWE = false, typename D, typename S0, HW hw_ = hw> |
204 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0); |
205 | template <bool forceWE = false, typename D, HW hw_ = hw> |
206 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0); |
207 | template <bool forceWE = false, typename D, HW hw_ = hw> |
208 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0); |
209 | |
210 | template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw> |
211 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1); |
212 | template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw> |
213 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1); |
214 | template <bool forceWE = false, typename D, typename S0, HW hw_ = hw> |
215 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1); |
216 | template <bool forceWE = false, typename D, typename S0, HW hw_ = hw> |
217 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1); |
218 | |
219 | template <HW hw_ = hw> |
220 | typename std::enable_if<hwLE(hw_, HW::Gen9)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2); |
221 | template <HW hw_ = hw> |
222 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2); |
223 | template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw> |
224 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2); |
225 | template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw> |
226 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2); |
227 | |
228 | template <typename DS0> |
229 | void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0); |
230 | template <typename DS0, typename S1> |
231 | void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1); |
232 | |
233 | template <typename D, typename S0, typename S2> |
234 | void opBfn(Opcode op, DataType defaultType, const InstructionModifier &mod, int bfnCtrl, D dst, S0 src0, RegData src1, S2 src2); |
235 | void opDpas(Opcode op, DataType defaultType, const InstructionModifier &mod, int sdepth, int rcount, RegData dst, RegData src0, RegData src1, RegData src2); |
236 | |
237 | template <typename D, HW hw_ = hw> |
238 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc); |
239 | template <typename D, HW hw_ = hw> |
240 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc); |
241 | template <typename ED, typename D, HW hw_ = hw> |
242 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc); |
243 | |
244 | template <HW hw_ = hw> |
245 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc); |
246 | template <HW hw_ = hw> |
247 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc); |
248 | template <typename D, HW hw_ = hw> |
249 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc); |
250 | |
251 | template <typename ED, typename D, HW hw_ = hw> |
252 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc); |
253 | template <typename D, HW hw_ = hw> |
254 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc); |
255 | template <typename D, HW hw_ = hw> |
256 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc); |
257 | |
258 | template <HW hw_ = hw> |
259 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip); |
260 | template <HW hw_ = hw> |
261 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip); |
262 | template <bool forceWE = false, HW hw_ = hw> |
263 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip); |
264 | template <bool forceWE = false, HW hw_ = hw> |
265 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip); |
266 | template <bool forceWE = false, bool small12 = true, HW hw_ = hw> |
267 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0); |
268 | template <bool forceWE = false, bool small12 = true, HW hw_ = hw> |
269 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0); |
270 | |
271 | void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip); |
272 | template <bool forceWE = false> |
273 | void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip); |
274 | void opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip); |
275 | |
276 | template <HW hw_ = hw> |
277 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip); |
278 | template <HW hw_ = hw> |
279 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip); |
280 | void opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip); |
281 | |
282 | void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod); |
283 | void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0); |
284 | void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0); |
285 | |
286 | void opNop(Opcode op); |
287 | |
288 | inline void unsupported(); |
289 | |
290 | #include "ngen_compiler_fix.hpp" |
291 | |
292 | public: |
293 | explicit BinaryCodeGenerator(int stepping_ = 0) : stepping{stepping_}, defaultModifier{}, labelManager{}, sync{this}, load{this}, store{this}, atomic{this} { |
294 | _workaround_(); |
295 | pushStream(rootStream); |
296 | } |
297 | |
298 | ~BinaryCodeGenerator() { |
299 | for (size_t sn = 1; sn < streamStack.size(); sn++) |
300 | delete streamStack[sn]; |
301 | } |
302 | |
303 | std::vector<uint8_t> getCode(); |
304 | size_t getRootStreamLength() const { return rootStream.length(); } |
305 | |
306 | int getStepping() const { return stepping; } |
307 | void setStepping(int stepping_) { stepping = stepping_; } |
308 | |
309 | protected: |
310 | // Configuration. |
311 | void setDefaultNoMask(bool def = true) { defaultModifier.setWrEn(def); } |
312 | void setDefaultAutoSWSB(bool def = true) { defaultModifier.setAutoSWSB(def); } |
313 | bool getDefaultNoMask() const { return defaultModifier.isWrEn(); } |
314 | bool getDefaultAutoSWSB() const { return defaultModifier.isAutoSWSB(); } |
315 | |
316 | // Stream handling. |
317 | void pushStream() { pushStream(new InstructionStream()); } |
318 | void pushStream(InstructionStream *s) { streamStack.push_back(s); } |
319 | void pushStream(InstructionStream &s) { pushStream(&s); } |
320 | |
321 | InstructionStream *popStream(); |
322 | |
323 | void appendStream(InstructionStream *s) { appendStream(*s); } |
324 | void appendStream(InstructionStream &s) { streamStack.back()->append(s, labelManager); } |
325 | void appendCurrentStream() { InstructionStream *s = popStream(); appendStream(s); delete s; } |
326 | |
327 | void discardStream() { delete popStream(); } |
328 | |
329 | template <typename String> |
330 | void (String) {} |
331 | |
332 | // Registers. |
333 | #ifndef NGEN_GLOBAL_REGS |
334 | #include "ngen_registers.hpp" |
335 | #endif |
336 | |
337 | // Labels. |
338 | inline void mark(Label &label) { streamStack.back()->mark(label, labelManager); } |
339 | |
340 | // Instructions. |
341 | template <typename DT = void> |
342 | void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
343 | opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1); |
344 | } |
345 | template <typename DT = void> |
346 | void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
347 | opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1); |
348 | } |
349 | template <typename DT = void> |
350 | void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
351 | opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1); |
352 | } |
353 | template <typename DT = void> |
354 | void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
355 | opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1); |
356 | } |
357 | template <typename DT = void> |
358 | void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
359 | if (hw < HW::XeHP) unsupported(); |
360 | opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2); |
361 | } |
362 | template <typename DT = void> |
363 | void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
364 | if (hw < HW::XeHP) unsupported(); |
365 | opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2); |
366 | } |
367 | template <typename DT = void> |
368 | void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
369 | if (hw < HW::XeHP) unsupported(); |
370 | opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2); |
371 | } |
372 | template <typename DT = void> |
373 | void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
374 | if (hw < HW::XeHP) unsupported(); |
375 | opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2); |
376 | } |
377 | template <typename DT = void> |
378 | void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
379 | opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1); |
380 | } |
381 | template <typename DT = void> |
382 | void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
383 | opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1); |
384 | } |
385 | #ifndef NGEN_NO_OP_NAMES |
386 | template <typename DT = void> |
387 | void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
388 | and_<DT>(mod, dst, src0, src1); |
389 | } |
390 | template <typename DT = void> |
391 | void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
392 | and_<DT>(mod, dst, src0, src1); |
393 | } |
394 | #endif |
395 | template <typename DT = void> |
396 | void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
397 | opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1); |
398 | } |
399 | template <typename DT = void> |
400 | void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
401 | opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1); |
402 | } |
403 | template <typename DT = void> |
404 | void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
405 | opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1); |
406 | } |
407 | template <typename DT = void> |
408 | void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
409 | opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1); |
410 | } |
411 | template <typename DT = void> |
412 | void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
413 | opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2); |
414 | } |
415 | template <typename DT = void> |
416 | void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
417 | opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2); |
418 | } |
419 | template <typename DT = void> |
420 | void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
421 | opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2); |
422 | } |
423 | template <typename DT = void> |
424 | void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
425 | opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2); |
426 | } |
427 | template <typename DT = void> |
428 | void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
429 | opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1); |
430 | } |
431 | template <typename DT = void> |
432 | void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
433 | opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1); |
434 | } |
435 | template <typename DT = void> |
436 | void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
437 | opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2); |
438 | } |
439 | template <typename DT = void> |
440 | void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
441 | opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2); |
442 | } |
443 | template <typename DT = void> |
444 | void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
445 | opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2); |
446 | } |
447 | template <typename DT = void> |
448 | void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
449 | opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2); |
450 | } |
451 | template <typename DT = void> |
452 | void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
453 | if (hw < HW::XeHP) unsupported(); |
454 | opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2); |
455 | } |
456 | template <typename DT = void> |
457 | void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
458 | if (hw < HW::XeHP) unsupported(); |
459 | opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2); |
460 | } |
461 | template <typename DT = void> |
462 | void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
463 | if (hw < HW::XeHP) unsupported(); |
464 | opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2); |
465 | } |
466 | template <typename DT = void> |
467 | void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
468 | if (hw < HW::XeHP) unsupported(); |
469 | opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2); |
470 | } |
471 | template <typename DT = void> |
472 | void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
473 | opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0); |
474 | } |
475 | template <typename DT = void> |
476 | void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
477 | opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0); |
478 | } |
479 | void brc(const InstructionModifier &mod, Label &jip, Label &uip) { |
480 | opBranch(Opcode::brc, mod, isGen12 ? null.ud() : ip.d(), jip, uip); |
481 | } |
482 | void brc(const InstructionModifier &mod, RegData src0) { |
483 | src0.setRegion(2, 2, 1); |
484 | opBranch<true, true>(Opcode::brc, mod, isGen12 ? null.ud() : ip.d(), src0); |
485 | } |
486 | void brd(const InstructionModifier &mod, Label &jip) { |
487 | opBranch(Opcode::brd, mod, isGen12 ? null.ud() : ip.d(), jip); |
488 | } |
489 | void brd(const InstructionModifier &mod, RegData src0) { |
490 | src0.setRegion(2, 2, 1); |
491 | opBranch<true, true>(Opcode::brd, mod, isGen12 ? null.ud() : ip.d(), src0); |
492 | } |
493 | void break_(const InstructionModifier &mod, Label &jip, Label &uip) { |
494 | opBranch(Opcode::break_, mod, null, jip, uip); |
495 | } |
496 | void call(const InstructionModifier &mod, const RegData &dst, Label &jip) { |
497 | opCall(Opcode::call, mod, dst, jip); |
498 | } |
499 | void call(const InstructionModifier &mod, const RegData &dst, RegData jip) { |
500 | if (isGen12) |
501 | opBranch<true, true>(Opcode::call, mod, dst, jip); |
502 | else { |
503 | jip.setRegion(0, 1, 0); |
504 | opX<true>(Opcode::call, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip); |
505 | } |
506 | } |
507 | void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) { |
508 | if (isGen12) |
509 | opBranch<true>(Opcode::calla, mod, dst, jip); |
510 | else |
511 | opX<true>(Opcode::calla, DataType::d, mod, dst, (hw <= HW::Gen9) ? null.ud(0)(2,2,1) : null.ud(0)(0,1,0), Immediate::d(jip)); |
512 | } |
513 | void calla(const InstructionModifier &mod, const RegData &dst, RegData jip) { |
514 | if (isGen12) |
515 | opBranch<true, true>(Opcode::calla, mod, dst, jip); |
516 | else { |
517 | jip.setRegion(0, 1, 0); |
518 | opX<true>(Opcode::calla, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip); |
519 | } |
520 | } |
521 | template <typename DT = void> |
522 | void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
523 | opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0); |
524 | } |
525 | template <typename DT = void> |
526 | void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
527 | opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0); |
528 | } |
529 | template <typename DT = void> |
530 | void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
531 | opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1); |
532 | } |
533 | template <typename DT = void> |
534 | void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
535 | opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1); |
536 | } |
537 | template <typename DT = void> |
538 | void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
539 | opX(isGen12 ? Opcode::cmpn_gen12 : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1); |
540 | } |
541 | template <typename DT = void> |
542 | void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
543 | opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2); |
544 | } |
545 | template <typename DT = void> |
546 | void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
547 | opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2); |
548 | } |
549 | template <typename DT = void> |
550 | void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
551 | opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2); |
552 | } |
553 | template <typename DT = void> |
554 | void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
555 | opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2); |
556 | } |
557 | void cont(const InstructionModifier &mod, Label &jip, Label &uip) { |
558 | opBranch(Opcode::cont, mod, null, jip, uip); |
559 | } |
560 | template <typename DT = void> |
561 | void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
562 | opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1); |
563 | } |
564 | template <typename DT = void> |
565 | void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
566 | opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1); |
567 | } |
568 | template <typename DT = void> |
569 | void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
570 | opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1); |
571 | } |
572 | template <typename DT = void> |
573 | void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
574 | opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1); |
575 | } |
576 | template <typename DT = void> |
577 | void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
578 | opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1); |
579 | } |
580 | template <typename DT = void> |
581 | void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
582 | opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1); |
583 | } |
584 | template <typename DT = void> |
585 | void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
586 | if (hw < HW::Gen12LP) unsupported(); |
587 | opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2); |
588 | } |
589 | template <typename DT = void> |
590 | void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
591 | if (hw < HW::Gen12LP) unsupported(); |
592 | opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2); |
593 | } |
594 | template <typename DT = void> |
595 | void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
596 | if (hw < HW::Gen12LP) unsupported(); |
597 | opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2); |
598 | } |
599 | template <typename DT = void> |
600 | void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
601 | if (hw < HW::Gen12LP) unsupported(); |
602 | opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2); |
603 | } |
604 | template <typename DT = void> |
605 | void dpas(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
606 | opDpas(Opcode::dpas, getDataType<DT>(), mod, sdepth, rcount, dst, src0, src1, src2); |
607 | } |
608 | template <typename DT = void> |
609 | void dpasw(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
610 | opDpas(Opcode::dpasw, getDataType<DT>(), mod, sdepth, rcount, dst, src0, src1, src2); |
611 | } |
612 | template <typename DT = void> |
613 | void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
614 | opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1); |
615 | } |
616 | template <typename DT = void> |
617 | void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
618 | opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1); |
619 | } |
620 | void else_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) { |
621 | mod.setBranchCtrl(branchCtrl); |
622 | opBranch(Opcode::else_, mod, null, jip, uip); |
623 | } |
624 | void else_(InstructionModifier mod, Label &jip) { |
625 | else_(mod, jip, jip); |
626 | } |
627 | void endif(const InstructionModifier &mod, Label &jip) { |
628 | opBranch(Opcode::endif, mod, null, jip); |
629 | } |
630 | void endif(const InstructionModifier &mod) { |
631 | opBranch(Opcode::endif, mod, null, sizeof(Instruction8)); |
632 | } |
633 | template <typename DT = void> |
634 | void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
635 | opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0); |
636 | } |
637 | template <typename DT = void> |
638 | void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
639 | opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0); |
640 | } |
641 | template <typename DT = void> |
642 | void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
643 | opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0); |
644 | } |
645 | template <typename DT = void> |
646 | void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
647 | opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0); |
648 | } |
649 | template <typename DT = void> |
650 | void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
651 | opX(Opcode::frc, getDataType<DT>(), mod, dst, src0); |
652 | } |
653 | void goto_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) { |
654 | mod.setBranchCtrl(branchCtrl); |
655 | opBranch(Opcode::goto_, mod, null, jip, uip); |
656 | } |
657 | void goto_(const InstructionModifier &mod, Label &jip) { |
658 | goto_(mod, jip, jip); |
659 | } |
660 | void halt(const InstructionModifier &mod, Label &jip, Label &uip) { |
661 | opBranch(Opcode::halt, mod, null, jip, uip); |
662 | } |
663 | void halt(const InstructionModifier &mod, Label &jip) { |
664 | halt(mod, jip, jip); |
665 | } |
666 | void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) { |
667 | mod.setBranchCtrl(branchCtrl); |
668 | opBranch(Opcode::if_, mod, null, jip, uip); |
669 | } |
670 | void if_(const InstructionModifier &mod, Label &jip) { |
671 | if_(mod, jip, jip); |
672 | } |
673 | void illegal() { |
674 | opX(Opcode::illegal, DataType::invalid, InstructionModifier(), null, null, null); |
675 | } |
676 | void join(InstructionModifier mod, Label &jip) { |
677 | opBranch(Opcode::join, mod, null, jip); |
678 | } |
679 | void join(InstructionModifier mod) { |
680 | opBranch(Opcode::join, mod, null, sizeof(Instruction8)); |
681 | } |
682 | void jmpi(const InstructionModifier &mod, Label &jip) { |
683 | auto dst = isGen12 ? ARF(null) : ARF(ip); |
684 | opJmpi(Opcode::jmpi, mod, dst, dst, jip); |
685 | } |
686 | void jmpi(const InstructionModifier &mod, const RegData &jip) { |
687 | #ifdef NGEN_SAFE |
688 | if (!isGen12 && jip.getType() != DataType::d && jip.getType() != DataType::invalid) |
689 | throw invalid_type_exception(); |
690 | #endif |
691 | if (isGen12) |
692 | opBranch<true, false>(Opcode::jmpi, mod, null, jip); |
693 | else |
694 | opX(Opcode::jmpi, DataType::d, mod, ip, ip, jip); |
695 | } |
696 | template <typename DT = void> |
697 | void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
698 | if (hw >= HW::Gen11) unsupported(); |
699 | opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1); |
700 | } |
701 | template <typename DT = void> |
702 | void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
703 | if (hw >= HW::Gen11) unsupported(); |
704 | opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1); |
705 | } |
706 | template <typename DT = void> |
707 | void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
708 | opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2); |
709 | } |
710 | template <typename DT = void> |
711 | void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
712 | opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0); |
713 | } |
714 | template <typename DT = void> |
715 | void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
716 | opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0); |
717 | } |
718 | template <typename DT = void> |
719 | void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
720 | opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1); |
721 | } |
722 | template <typename DT = void> |
723 | void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
724 | opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1); |
725 | } |
726 | template <typename DT = void> |
727 | void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
728 | opX(Opcode::mach, getDataType<DT>(), (hw >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1); |
729 | } |
730 | template <typename DT = void> |
731 | void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
732 | opX(Opcode::mach, getDataType<DT>(), (hw >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1); |
733 | } |
734 | template <typename DT = void> |
735 | void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
736 | #ifdef NGEN_SAFE |
737 | if (hw < HW::Gen10) unsupported(); |
738 | #endif |
739 | opX((hw >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1); |
740 | } |
741 | template <typename DT = void> |
742 | void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
743 | #ifdef NGEN_SAFE |
744 | if (hw < HW::Gen10) unsupported(); |
745 | #endif |
746 | opX((hw >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1); |
747 | } |
748 | template <typename DT = void> |
749 | void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) { |
750 | opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2); |
751 | } |
752 | template <typename DT = void> |
753 | void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) { |
754 | opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2); |
755 | } |
756 | template <typename DT = void> |
757 | void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) { |
758 | opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2); |
759 | } |
760 | template <typename DT = void> |
761 | void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) { |
762 | opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2); |
763 | } |
764 | template <typename DT = void, HW hw_ = hw> |
765 | typename std::enable_if<hwLE(hw_, HW::Gen9)>::type |
766 | madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) { |
767 | opX(Opcode::madm, getDataType<DT>(), mod, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1), extToAlign16(src2)); |
768 | } |
769 | template <typename DT = void, HW hw_ = hw> |
770 | typename std::enable_if<hwGT(hw_, HW::Gen9)>::type |
771 | madm(const InstructionModifier &mod, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1, const ExtendedReg &src2) { |
772 | src0.getBase().setRegion(4,4,1); |
773 | src1.getBase().setRegion(4,4,1); |
774 | opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2); |
775 | } |
776 | template <typename DT = void> |
777 | void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) { |
778 | #ifdef NGEN_SAFE |
779 | if (mathArgCount(fc) != 1) throw invalid_operand_count_exception(); |
780 | #endif |
781 | opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0); |
782 | } |
783 | template <typename DT = void> |
784 | void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) { |
785 | #ifdef NGEN_SAFE |
786 | if (mathArgCount(fc) != 2) throw invalid_operand_count_exception(); |
787 | #endif |
788 | opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1); |
789 | } |
790 | template <typename DT = void> |
791 | void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) { |
792 | #ifdef NGEN_SAFE |
793 | if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception(); |
794 | #endif |
795 | opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1.forceInt32()); |
796 | } |
797 | template <typename DT = void, HW hw_ = hw> |
798 | typename std::enable_if<hwLT(hw_, HW::Gen11)>::type |
799 | math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) { |
800 | #ifdef NGEN_SAFE |
801 | if (fc != MathFunction::rsqtm) throw invalid_operand_exception(); |
802 | #endif |
803 | opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0)); |
804 | } |
805 | template <typename DT = void, HW hw_ = hw> |
806 | typename std::enable_if<hwGE(hw_, HW::Gen11)>::type |
807 | math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0) { |
808 | #ifdef NGEN_SAFE |
809 | if (fc != MathFunction::rsqtm) throw invalid_operand_exception(); |
810 | #endif |
811 | if (hw == HW::Gen11) |
812 | src0.getBase().setRegion(2,2,1); |
813 | else |
814 | src0.getBase().setRegion(1,1,0); |
815 | opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0); |
816 | } |
817 | template <typename DT = void, HW hw_ = hw> |
818 | typename std::enable_if<hwLT(hw_, HW::Gen11)>::type |
819 | math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) { |
820 | #ifdef NGEN_SAFE |
821 | if (fc != MathFunction::invm) throw invalid_operand_exception(); |
822 | #endif |
823 | opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1)); |
824 | } |
825 | template <typename DT = void, HW hw_ = hw> |
826 | typename std::enable_if<hwGE(hw_, HW::Gen11)>::type |
827 | math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1) { |
828 | #ifdef NGEN_SAFE |
829 | if (fc != MathFunction::invm) throw invalid_operand_exception(); |
830 | #endif |
831 | if (hw == HW::Gen11) { |
832 | src0.getBase().setRegion(2,2,1); |
833 | src1.getBase().setRegion(2,2,1); |
834 | } else { |
835 | src0.getBase().setRegion(1,1,0); |
836 | src1.getBase().setRegion(1,1,0); |
837 | } |
838 | opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1); |
839 | } |
840 | template <typename DT = void> |
841 | void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
842 | opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0); |
843 | } |
844 | template <typename DT = void> |
845 | void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
846 | opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0); |
847 | } |
848 | template <typename DT = void> |
849 | void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
850 | if (hardware >= HW::Gen10) |
851 | movi<DT>(mod, dst, src0, null.ud(0)(1,1,0)); |
852 | else |
853 | opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0); |
854 | } |
855 | template <typename DT = void> |
856 | void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
857 | #ifdef NGEN_SAFE |
858 | if (hardware < HW::Gen10) throw unsupported_instruction(); |
859 | #endif |
860 | opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1); |
861 | } |
862 | template <typename DT = void> |
863 | void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
864 | #ifdef NGEN_SAFE |
865 | if (hardware < HW::Gen10) throw unsupported_instruction(); |
866 | #endif |
867 | opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1); |
868 | } |
869 | template <typename DT = void> |
870 | void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
871 | opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1); |
872 | } |
873 | template <typename DT = void> |
874 | void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) { |
875 | if (dst.getBytes() == 8) |
876 | src1 = src1.forceInt32(); |
877 | opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1); |
878 | } |
879 | void nop() { |
880 | opNop(isGen12 ? Opcode::nop_gen12 : Opcode::nop); |
881 | } |
882 | void nop(const InstructionModifier &mod) { |
883 | opX(isGen12 ? Opcode::nop_gen12 : Opcode::nop, DataType::invalid, mod, null, null, null); |
884 | } |
885 | template <typename DT = void> |
886 | void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
887 | opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0); |
888 | } |
889 | template <typename DT = void> |
890 | void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
891 | opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0); |
892 | } |
893 | #ifndef NGEN_NO_OP_NAMES |
894 | template <typename DT = void> |
895 | void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
896 | not_<DT>(mod, dst, src0); |
897 | } |
898 | template <typename DT = void> |
899 | void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
900 | not_<DT>(mod, dst, src0); |
901 | } |
902 | #endif |
903 | template <typename DT = void> |
904 | void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
905 | opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1); |
906 | } |
907 | template <typename DT = void> |
908 | void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
909 | opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1); |
910 | } |
911 | #ifndef NGEN_NO_OP_NAMES |
912 | template <typename DT = void> |
913 | void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
914 | or_<DT>(mod, dst, src0, src1); |
915 | } |
916 | template <typename DT = void> |
917 | void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
918 | or_<DT>(mod, dst, src0, src1); |
919 | } |
920 | #endif |
921 | template <typename DT = void> |
922 | void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
923 | if (hw >= HW::Gen11) unsupported(); |
924 | opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1); |
925 | } |
926 | void ret(const InstructionModifier &mod, RegData src0) { |
927 | src0.setRegion(2,2,1); |
928 | if (isGen12) |
929 | opBranch<true, true>(Opcode::ret, mod, null, src0); |
930 | else |
931 | opX<true>(Opcode::ret, DataType::ud, mod, null, src0); |
932 | } |
933 | template <typename DT = void> |
934 | void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
935 | opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0); |
936 | } |
937 | template <typename DT = void> |
938 | void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
939 | opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0); |
940 | } |
941 | template <typename DT = void> |
942 | void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
943 | opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0); |
944 | } |
945 | template <typename DT = void> |
946 | void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
947 | opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0); |
948 | } |
949 | template <typename DT = void> |
950 | void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
951 | opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0); |
952 | } |
953 | template <typename DT = void> |
954 | void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
955 | opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0); |
956 | } |
957 | template <typename DT = void> |
958 | void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) { |
959 | opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0); |
960 | } |
961 | template <typename DT = void> |
962 | void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) { |
963 | opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0); |
964 | } |
965 | template <typename DT = void> |
966 | void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
967 | opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1); |
968 | } |
969 | template <typename DT = void> |
970 | void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
971 | opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1); |
972 | } |
973 | template <typename DT = void> |
974 | void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
975 | opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1); |
976 | } |
977 | template <typename DT = void> |
978 | void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
979 | opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1); |
980 | } |
981 | template <typename DT = void> |
982 | void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
983 | if (hw >= HW::Gen12LP) unsupported(); |
984 | opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1); |
985 | } |
986 | template <typename DT = void> |
987 | void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
988 | if (hw >= HW::Gen12LP) unsupported(); |
989 | opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1); |
990 | } |
991 | template <typename DT = void> |
992 | void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
993 | if (hw >= HW::Gen12LP) unsupported(); |
994 | opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1); |
995 | } |
996 | template <typename DT = void> |
997 | void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
998 | if (hw >= HW::Gen12LP) unsupported(); |
999 | opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1); |
1000 | } |
1001 | template <typename DT = void> |
1002 | void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1003 | opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1); |
1004 | } |
1005 | template <typename DT = void> |
1006 | void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1007 | opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1); |
1008 | } |
1009 | |
1010 | /* Gen12-style sends */ |
1011 | void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) { |
1012 | opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc); |
1013 | } |
1014 | void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) { |
1015 | opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc); |
1016 | } |
1017 | void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) { |
1018 | opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc); |
1019 | } |
1020 | void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) { |
1021 | opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc); |
1022 | } |
1023 | void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) { |
1024 | opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc); |
1025 | } |
1026 | void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) { |
1027 | opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc); |
1028 | } |
1029 | void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) { |
1030 | opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc); |
1031 | } |
1032 | void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) { |
1033 | opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc); |
1034 | } |
1035 | /* Pre-Gen12-style sends; also supported on Gen12. */ |
1036 | void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) { |
1037 | opSend(Opcode::send, mod, dst, src0, exdesc, desc); |
1038 | } |
1039 | void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) { |
1040 | opSend(Opcode::send, mod, dst, src0, exdesc, desc); |
1041 | } |
1042 | void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) { |
1043 | opSend(Opcode::sendc, mod, dst, src0, exdesc, desc); |
1044 | } |
1045 | void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) { |
1046 | opSend(Opcode::sendc, mod, dst, src0, exdesc, desc); |
1047 | } |
1048 | void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) { |
1049 | opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc); |
1050 | } |
1051 | void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) { |
1052 | opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc); |
1053 | } |
1054 | void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) { |
1055 | opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc); |
1056 | } |
1057 | void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) { |
1058 | opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc); |
1059 | } |
1060 | void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) { |
1061 | opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc); |
1062 | } |
1063 | void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) { |
1064 | opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc); |
1065 | } |
1066 | void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) { |
1067 | opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc); |
1068 | } |
1069 | void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) { |
1070 | opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc); |
1071 | } |
1072 | |
1073 | template <typename DT = void> |
1074 | void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1075 | opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1); |
1076 | } |
1077 | template <typename DT = void> |
1078 | void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1079 | opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1); |
1080 | } |
1081 | template <typename DT = void> |
1082 | void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1083 | opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1); |
1084 | } |
1085 | template <typename DT = void> |
1086 | void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1087 | opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1); |
1088 | } |
1089 | template <typename DT = void> |
1090 | void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1091 | opX(isGen12 ? Opcode::smov_gen12 : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1); |
1092 | } |
1093 | template <typename DT = void> |
1094 | void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1095 | opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1); |
1096 | } |
1097 | template <typename DT = void> |
1098 | void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1099 | opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1); |
1100 | } |
1101 | template <typename DT = void> |
1102 | void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1103 | opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1); |
1104 | } |
1105 | template <typename DT = void> |
1106 | void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1107 | opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1); |
1108 | } |
1109 | void wait(const InstructionModifier &mod, const RegData &nreg) { |
1110 | #ifdef NGEN_SAFE |
1111 | if (!nreg.isARF() || nreg.getARFType() != ARFType::n) throw invalid_arf_exception(); |
1112 | #endif |
1113 | opX(Opcode::wait, DataType::invalid, mod, nreg, nreg); |
1114 | } |
1115 | void while_(const InstructionModifier &mod, Label &jip) { |
1116 | opBranch(Opcode::while_, mod, null, jip); |
1117 | } |
1118 | template <typename DT = void> |
1119 | void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1120 | opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1); |
1121 | } |
1122 | template <typename DT = void> |
1123 | void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1124 | opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1); |
1125 | } |
1126 | #ifndef NGEN_NO_OP_NAMES |
1127 | template <typename DT = void> |
1128 | void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) { |
1129 | xor_<DT>(mod, dst, src0, src1); |
1130 | } |
1131 | template <typename DT = void> |
1132 | void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) { |
1133 | xor_<DT>(mod, dst, src0, src1); |
1134 | } |
1135 | #endif |
1136 | |
1137 | private: |
1138 | struct Sync { |
1139 | BinaryCodeGenerator<hw> &parent; |
1140 | |
1141 | Sync(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {} |
1142 | |
1143 | void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) { |
1144 | parent.opSync(Opcode::sync, fc, mod); |
1145 | } |
1146 | void operator()(SyncFunction fc, const RegData &src0) { |
1147 | this->operator()(fc, InstructionModifier(), src0); |
1148 | } |
1149 | void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) { |
1150 | parent.opSync(Opcode::sync, fc, mod, src0); |
1151 | } |
1152 | void operator()(SyncFunction fc, int src0) { |
1153 | this->operator()(fc, InstructionModifier(), src0); |
1154 | } |
1155 | void operator()(SyncFunction fc, const InstructionModifier &mod, uint32_t src0) { |
1156 | parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0)); |
1157 | } |
1158 | void allrd() { |
1159 | allrd(null.ud(0)(0, 1, 1)); |
1160 | } |
1161 | void allrd(const InstructionModifier &mod) { |
1162 | allrd(mod, null.ud(0)(0, 1, 1)); |
1163 | } |
1164 | void allrd(const RegData &src0) { |
1165 | allrd(InstructionModifier(), src0); |
1166 | } |
1167 | void allrd(const InstructionModifier &mod, const RegData &src0) { |
1168 | this->operator()(SyncFunction::allrd, mod, src0); |
1169 | } |
1170 | void allrd(uint32_t src0) { |
1171 | allrd(InstructionModifier(), src0); |
1172 | } |
1173 | void allrd(const InstructionModifier &mod, uint32_t src0) { |
1174 | this->operator()(SyncFunction::allrd, mod, src0); |
1175 | } |
1176 | void allwr() { |
1177 | allwr(null); |
1178 | } |
1179 | void allwr(const InstructionModifier &mod) { |
1180 | allwr(mod, null); |
1181 | } |
1182 | void allwr(const RegData &src0) { |
1183 | allwr(InstructionModifier(), src0); |
1184 | } |
1185 | void allwr(const InstructionModifier &mod, const RegData &src0) { |
1186 | this->operator()(SyncFunction::allwr, mod, src0); |
1187 | } |
1188 | void allwr(uint32_t src0) { |
1189 | allwr(InstructionModifier(), src0); |
1190 | } |
1191 | void allwr(const InstructionModifier &mod, uint32_t src0) { |
1192 | this->operator()(SyncFunction::allwr, mod, src0); |
1193 | } |
1194 | void bar(const InstructionModifier &mod = InstructionModifier()) { |
1195 | this->operator()(SyncFunction::bar, mod); |
1196 | } |
1197 | void bar(const InstructionModifier &mod, uint32_t src0) { |
1198 | this->operator()(SyncFunction::bar, mod, src0); |
1199 | } |
1200 | void bar(const InstructionModifier &mod, const RegData &src0) { |
1201 | this->operator()(SyncFunction::bar, mod, src0); |
1202 | } |
1203 | void bar(uint32_t src0) { |
1204 | this->operator()(SyncFunction::bar, InstructionModifier(), src0); |
1205 | } |
1206 | void bar(const RegData &src0) { |
1207 | this->operator()(SyncFunction::bar, InstructionModifier(), src0); |
1208 | } |
1209 | void host(const InstructionModifier &mod = InstructionModifier()) { |
1210 | this->operator()(SyncFunction::host, mod); |
1211 | } |
1212 | void nop(const InstructionModifier &mod = InstructionModifier()) { |
1213 | this->operator()(SyncFunction::nop, mod); |
1214 | } |
1215 | }; |
1216 | public: |
1217 | Sync sync; |
1218 | |
1219 | |
1220 | private: |
1221 | struct Load { |
1222 | BinaryCodeGenerator<hw> &parent; |
1223 | |
1224 | Load(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {} |
1225 | |
1226 | template <typename DataSpec> |
1227 | void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr) |
1228 | { |
1229 | MessageDescriptor desc; |
1230 | ExtendedMessageDescriptor exdesc; |
1231 | |
1232 | encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr); |
1233 | parent.send(mod, dst, addr, exdesc.all, desc.all); |
1234 | } |
1235 | |
1236 | template <typename DataSpec> |
1237 | void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr) |
1238 | { |
1239 | MessageDescriptor desc; |
1240 | ExtendedMessageDescriptor exdesc; |
1241 | |
1242 | encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr); |
1243 | parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all); |
1244 | } |
1245 | |
1246 | template <typename DataSpec> |
1247 | void operator()(SharedFunction sfid, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr) |
1248 | { |
1249 | MessageDescriptor desc; |
1250 | ExtendedMessageDescriptor exdesc; |
1251 | |
1252 | exdesc.parts.sfid = static_cast<unsigned>(sfid); |
1253 | encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr); |
1254 | exdesc.parts.sfid = static_cast<unsigned>(sfid); |
1255 | parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all); |
1256 | } |
1257 | |
1258 | void ugm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr) |
1259 | { |
1260 | this->operator()(SharedFunction::ugm, mod, dst, spec, base, addr); |
1261 | } |
1262 | void ugml(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr) |
1263 | { |
1264 | this->operator()(SharedFunction::ugml, mod, dst, spec, base, addr); |
1265 | } |
1266 | void tgm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr) |
1267 | { |
1268 | this->operator()(SharedFunction::tgm, mod, dst, spec, base, addr); |
1269 | } |
1270 | void slm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr) |
1271 | { |
1272 | this->operator()(SharedFunction::slm, mod, dst, spec, base, addr); |
1273 | } |
1274 | }; |
1275 | |
1276 | struct Store { |
1277 | BinaryCodeGenerator<hw> &parent; |
1278 | |
1279 | Store(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {} |
1280 | |
1281 | template <typename DataSpec> |
1282 | void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data) |
1283 | { |
1284 | MessageDescriptor desc; |
1285 | ExtendedMessageDescriptor exdesc; |
1286 | |
1287 | encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr); |
1288 | parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all); |
1289 | } |
1290 | |
1291 | template <typename DataSpec> |
1292 | void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1293 | { |
1294 | MessageDescriptor desc; |
1295 | ExtendedMessageDescriptor exdesc; |
1296 | |
1297 | encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr); |
1298 | parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all); |
1299 | } |
1300 | |
1301 | template <typename DataSpec> |
1302 | void operator()(SharedFunction sfid, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1303 | { |
1304 | MessageDescriptor desc; |
1305 | ExtendedMessageDescriptor exdesc; |
1306 | |
1307 | exdesc.parts.sfid = static_cast<unsigned>(sfid); |
1308 | encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr); |
1309 | exdesc.parts.sfid = static_cast<unsigned>(sfid); |
1310 | parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all); |
1311 | } |
1312 | |
1313 | void ugm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1314 | { |
1315 | this->operator()(SharedFunction::ugm, mod, spec, base, addr, data); |
1316 | } |
1317 | void ugml(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1318 | { |
1319 | this->operator()(SharedFunction::ugml, mod, spec, base, addr, data); |
1320 | } |
1321 | void tgm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1322 | { |
1323 | this->operator()(SharedFunction::tgm, mod, spec, base, addr, data); |
1324 | } |
1325 | void slm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1326 | { |
1327 | this->operator()(SharedFunction::slm, mod, spec, base, addr, data); |
1328 | } |
1329 | }; |
1330 | |
1331 | struct Atomic_ { |
1332 | BinaryCodeGenerator<hw> &parent; |
1333 | |
1334 | Atomic_(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {} |
1335 | |
1336 | template <typename DataSpec> |
1337 | void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister()) |
1338 | { |
1339 | MessageDescriptor desc; |
1340 | ExtendedMessageDescriptor exdesc; |
1341 | |
1342 | encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr); |
1343 | if (data.isNull()) |
1344 | parent.send(mod, dst, addr, exdesc.all, desc.all); |
1345 | else |
1346 | parent.sends(mod, dst, addr, data, exdesc.all, desc.all); |
1347 | } |
1348 | template <typename DataSpec> |
1349 | void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister()) |
1350 | { |
1351 | (*this)(op, mod, NullRegister(), spec, base, addr, data); |
1352 | } |
1353 | |
1354 | template <typename DataSpec> |
1355 | void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1356 | { |
1357 | MessageDescriptor desc; |
1358 | ExtendedMessageDescriptor exdesc; |
1359 | |
1360 | encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr); |
1361 | parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all); |
1362 | } |
1363 | template <typename DataSpec> |
1364 | void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1365 | { |
1366 | (*this)(op, mod, NullRegister(), spec, base, addr, data); |
1367 | } |
1368 | template <typename DataSpec> |
1369 | void operator()(SharedFunction sfid, AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data) |
1370 | { |
1371 | MessageDescriptor desc; |
1372 | ExtendedMessageDescriptor exdesc; |
1373 | |
1374 | exdesc.parts.sfid = static_cast<unsigned>(sfid); |
1375 | encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr); |
1376 | exdesc.parts.sfid = static_cast<unsigned>(sfid); |
1377 | parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all); |
1378 | } |
1379 | |
1380 | void ugm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1381 | { |
1382 | this->operator()(SharedFunction::ugm, op, mod, dst, spec, base, addr, data); |
1383 | } |
1384 | void ugm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1385 | { |
1386 | this->operator()(SharedFunction::ugm, op, mod, NullRegister(), spec, base, addr, data); |
1387 | } |
1388 | void ugml(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1389 | { |
1390 | this->operator()(SharedFunction::ugml, op, mod, dst, spec, base, addr, data); |
1391 | } |
1392 | void ugml(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1393 | { |
1394 | this->operator()(SharedFunction::ugml, op, mod, NullRegister(), spec, base, addr, data); |
1395 | } |
1396 | void tgm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1397 | { |
1398 | this->operator()(SharedFunction::tgm, op, mod, dst, spec, base, addr, data); |
1399 | } |
1400 | void tgm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1401 | { |
1402 | this->operator()(SharedFunction::tgm, op, mod, NullRegister(), spec, base, addr, data); |
1403 | } |
1404 | void slm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1405 | { |
1406 | this->operator()(SharedFunction::slm, op, mod, dst, spec, base, addr, data); |
1407 | } |
1408 | void slm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister()) |
1409 | { |
1410 | this->operator()(SharedFunction::slm, op, mod, NullRegister(), spec, base, addr, data); |
1411 | } |
1412 | }; |
1413 | public: |
1414 | Load load; |
1415 | Store store; |
1416 | Atomic_ atomic; |
1417 | |
1418 | void wrdep(const GRFRange &r) { |
1419 | int len = r.getLen(); |
1420 | for (int o = 0; o < len; o += 32) { |
1421 | int thisLen = std::min(len - o, 32); |
1422 | opX(Opcode::wrdep, DataType::ud, InstructionModifier::createAutoSWSB(), null, r[o], r[o + thisLen - 1]); |
1423 | } |
1424 | } |
1425 | void wrdep(const GRF &r) { |
1426 | wrdep(r-r); |
1427 | } |
1428 | |
1429 | #include "ngen_pseudo.hpp" |
1430 | }; |
1431 | |
1432 | #define NGEN_FORWARD(hw) \ |
1433 | using InstructionStream = typename ngen::BinaryCodeGenerator<hw>::InstructionStream; \ |
1434 | using ngen::BinaryCodeGenerator<hw>::isGen12; \ |
1435 | template <typename DT = void, typename... Targs> void add(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add<DT>(std::forward<Targs>(args)...); } \ |
1436 | template <typename DT = void, typename... Targs> void addc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template addc<DT>(std::forward<Targs>(args)...); } \ |
1437 | template <typename DT = void, typename... Targs> void and_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \ |
1438 | template <typename DT = void, typename... Targs> void asr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template asr<DT>(std::forward<Targs>(args)...); } \ |
1439 | template <typename DT = void, typename... Targs> void avg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template avg<DT>(std::forward<Targs>(args)...); } \ |
1440 | template <typename DT = void, typename... Targs> void bfe(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfe<DT>(std::forward<Targs>(args)...); } \ |
1441 | template <typename DT = void, typename... Targs> void bfi1(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi1<DT>(std::forward<Targs>(args)...); } \ |
1442 | template <typename DT = void, typename... Targs> void bfi2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi2<DT>(std::forward<Targs>(args)...); } \ |
1443 | template <typename DT = void, typename... Targs> void bfrev(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfrev<DT>(std::forward<Targs>(args)...); } \ |
1444 | template <typename DT = void, typename... Targs> void cbit(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cbit<DT>(std::forward<Targs>(args)...); } \ |
1445 | template <typename DT = void, typename... Targs> void cmp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmp<DT>(std::forward<Targs>(args)...); } \ |
1446 | template <typename DT = void, typename... Targs> void cmpn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmpn<DT>(std::forward<Targs>(args)...); } \ |
1447 | template <typename DT = void, typename... Targs> void csel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template csel<DT>(std::forward<Targs>(args)...); } \ |
1448 | template <typename DT = void, typename... Targs> void dp2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp2<DT>(std::forward<Targs>(args)...); } \ |
1449 | template <typename DT = void, typename... Targs> void dp3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp3<DT>(std::forward<Targs>(args)...); } \ |
1450 | template <typename DT = void, typename... Targs> void dp4(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4<DT>(std::forward<Targs>(args)...); } \ |
1451 | template <typename DT = void, typename... Targs> void dph(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dph<DT>(std::forward<Targs>(args)...); } \ |
1452 | template <typename DT = void, typename... Targs> void fbh(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbh<DT>(std::forward<Targs>(args)...); } \ |
1453 | template <typename DT = void, typename... Targs> void fbl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbl<DT>(std::forward<Targs>(args)...); } \ |
1454 | template <typename DT = void, typename... Targs> void frc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template frc<DT>(std::forward<Targs>(args)...); } \ |
1455 | template <typename DT = void, typename... Targs> void line(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template line<DT>(std::forward<Targs>(args)...); } \ |
1456 | template <typename DT = void, typename... Targs> void lrp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lrp<DT>(std::forward<Targs>(args)...); } \ |
1457 | template <typename DT = void, typename... Targs> void lzd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lzd<DT>(std::forward<Targs>(args)...); } \ |
1458 | template <typename DT = void, typename... Targs> void mac(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mac<DT>(std::forward<Targs>(args)...); } \ |
1459 | template <typename DT = void, typename... Targs> void macl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template macl<DT>(std::forward<Targs>(args)...); } \ |
1460 | template <typename DT = void, typename... Targs> void mach(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mach<DT>(std::forward<Targs>(args)...); } \ |
1461 | template <typename DT = void, typename... Targs> void mad(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mad<DT>(std::forward<Targs>(args)...); } \ |
1462 | template <typename DT = void, typename... Targs> void madm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template madm<DT>(std::forward<Targs>(args)...); } \ |
1463 | template <typename DT = void, typename... Targs> void math(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template math<DT>(std::forward<Targs>(args)...); } \ |
1464 | template <typename DT = void, typename... Targs> void mov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mov<DT>(std::forward<Targs>(args)...); } \ |
1465 | template <typename DT = void, typename... Targs> void movi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template movi<DT>(std::forward<Targs>(args)...); } \ |
1466 | template <typename DT = void, typename... Targs> void mul(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mul<DT>(std::forward<Targs>(args)...); } \ |
1467 | template <typename DT = void, typename... Targs> void not_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \ |
1468 | template <typename DT = void, typename... Targs> void or_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \ |
1469 | template <typename DT = void, typename... Targs> void pln(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pln<DT>(std::forward<Targs>(args)...); } \ |
1470 | template <typename DT = void, typename... Targs> void rndd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndd<DT>(std::forward<Targs>(args)...); } \ |
1471 | template <typename DT = void, typename... Targs> void rnde(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rnde<DT>(std::forward<Targs>(args)...); } \ |
1472 | template <typename DT = void, typename... Targs> void rndu(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndu<DT>(std::forward<Targs>(args)...); } \ |
1473 | template <typename DT = void, typename... Targs> void rndz(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndz<DT>(std::forward<Targs>(args)...); } \ |
1474 | template <typename DT = void, typename... Targs> void rol(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rol<DT>(std::forward<Targs>(args)...); } \ |
1475 | template <typename DT = void, typename... Targs> void ror(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template ror<DT>(std::forward<Targs>(args)...); } \ |
1476 | template <typename DT = void, typename... Targs> void sad2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sad2<DT>(std::forward<Targs>(args)...); } \ |
1477 | template <typename DT = void, typename... Targs> void sada2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sada2<DT>(std::forward<Targs>(args)...); } \ |
1478 | template <typename DT = void, typename... Targs> void sel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sel<DT>(std::forward<Targs>(args)...); } \ |
1479 | template <typename DT = void, typename... Targs> void shl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shl<DT>(std::forward<Targs>(args)...); } \ |
1480 | template <typename DT = void, typename... Targs> void shr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shr<DT>(std::forward<Targs>(args)...); } \ |
1481 | template <typename DT = void, typename... Targs> void smov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template smov<DT>(std::forward<Targs>(args)...); } \ |
1482 | template <typename DT = void, typename... Targs> void subb(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template subb<DT>(std::forward<Targs>(args)...); } \ |
1483 | template <typename DT = void, typename... Targs> void xor_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); } \ |
1484 | template <typename... Targs> void brc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brc(std::forward<Targs>(args)...); } \ |
1485 | template <typename... Targs> void brd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brd(std::forward<Targs>(args)...); } \ |
1486 | template <typename... Targs> void break_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::break_(std::forward<Targs>(args)...); } \ |
1487 | template <typename... Targs> void call(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::call(std::forward<Targs>(args)...); } \ |
1488 | template <typename... Targs> void calla(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::calla(std::forward<Targs>(args)...); } \ |
1489 | template <typename... Targs> void cont(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::cont(std::forward<Targs>(args)...); } \ |
1490 | template <typename... Targs> void else_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::else_(std::forward<Targs>(args)...); } \ |
1491 | template <typename... Targs> void endif(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::endif(std::forward<Targs>(args)...); } \ |
1492 | template <typename... Targs> void goto_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::goto_(std::forward<Targs>(args)...); } \ |
1493 | template <typename... Targs> void halt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::halt(std::forward<Targs>(args)...); } \ |
1494 | template <typename... Targs> void if_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::if_(std::forward<Targs>(args)...); } \ |
1495 | template <typename... Targs> void illegal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::illegal(std::forward<Targs>(args)...); } \ |
1496 | template <typename... Targs> void join(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::join(std::forward<Targs>(args)...); } \ |
1497 | template <typename... Targs> void jmpi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::jmpi(std::forward<Targs>(args)...); } \ |
1498 | template <typename... Targs> void nop(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::nop(std::forward<Targs>(args)...); } \ |
1499 | template <typename... Targs> void ret(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::ret(std::forward<Targs>(args)...); } \ |
1500 | template <typename... Targs> void send(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::send(std::forward<Targs>(args)...); } \ |
1501 | template <typename... Targs> void sendc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendc(std::forward<Targs>(args)...); } \ |
1502 | template <typename... Targs> void sends(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sends(std::forward<Targs>(args)...); } \ |
1503 | template <typename... Targs> void sendsc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendsc(std::forward<Targs>(args)...); } \ |
1504 | using ngen::BinaryCodeGenerator<hw>::sync; \ |
1505 | template <typename... Targs> void wait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wait(std::forward<Targs>(args)...); } \ |
1506 | template <typename... Targs> void while_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::while_(std::forward<Targs>(args)...); } \ |
1507 | template <typename... Targs> void wrdep(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wrdep(std::forward<Targs>(args)...); } \ |
1508 | template <typename DT = void, typename... Targs> void min_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min_<DT>(std::forward<Targs>(args)...); } \ |
1509 | template <typename DT = void, typename... Targs> void max_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max_<DT>(std::forward<Targs>(args)...); } \ |
1510 | template <typename DT = void, typename... Targs> void bfi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi<DT>(std::forward<Targs>(args)...); } \ |
1511 | template <typename DT = void, typename... Targs> void cos(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cos<DT>(std::forward<Targs>(args)...); } \ |
1512 | template <typename DT = void, typename... Targs> void exp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template exp<DT>(std::forward<Targs>(args)...); } \ |
1513 | template <typename DT = void, typename... Targs> void fdiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv<DT>(std::forward<Targs>(args)...); } \ |
1514 | template <typename DT = void, typename... Targs> void idiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template idiv<DT>(std::forward<Targs>(args)...); } \ |
1515 | template <typename DT = void, typename... Targs> void inv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv<DT>(std::forward<Targs>(args)...); } \ |
1516 | template <typename DT = void, typename... Targs> void invm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template invm<DT>(std::forward<Targs>(args)...); } \ |
1517 | template <typename DT = void, typename... Targs> void iqot(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template iqot<DT>(std::forward<Targs>(args)...); } \ |
1518 | template <typename DT = void, typename... Targs> void irem(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template irem<DT>(std::forward<Targs>(args)...); } \ |
1519 | template <typename DT = void, typename... Targs> void log(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template log<DT>(std::forward<Targs>(args)...); } \ |
1520 | template <typename DT = void, typename... Targs> void pow(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pow<DT>(std::forward<Targs>(args)...); } \ |
1521 | template <typename DT = void, typename... Targs> void rsqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqt<DT>(std::forward<Targs>(args)...); } \ |
1522 | template <typename DT = void, typename... Targs> void rsqtm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqtm<DT>(std::forward<Targs>(args)...); } \ |
1523 | template <typename DT = void, typename... Targs> void sin(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sin<DT>(std::forward<Targs>(args)...); } \ |
1524 | template <typename DT = void, typename... Targs> void sqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt<DT>(std::forward<Targs>(args)...); } \ |
1525 | template <typename DT = void, typename... Targs> void fdiv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv_ieee<DT>(std::forward<Targs>(args)...); } \ |
1526 | template <typename DT = void, typename... Targs> void inv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv_ieee<DT>(std::forward<Targs>(args)...); } \ |
1527 | template <typename DT = void, typename... Targs> void sqt_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt_ieee<DT>(std::forward<Targs>(args)...); } \ |
1528 | template <typename... Targs> void threadend(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::threadend(std::forward<Targs>(args)...); } \ |
1529 | template <typename... Targs> void barrierheader(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierheader(std::forward<Targs>(args)...); } \ |
1530 | template <typename... Targs> void barriermsg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriermsg(std::forward<Targs>(args)...); } \ |
1531 | template <typename... Targs> void barriersignal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriersignal(std::forward<Targs>(args)...); } \ |
1532 | template <typename... Targs> void barrierwait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierwait(std::forward<Targs>(args)...); } \ |
1533 | template <typename... Targs> void barrier(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrier(std::forward<Targs>(args)...); } \ |
1534 | using ngen::BinaryCodeGenerator<hw>::load; \ |
1535 | using ngen::BinaryCodeGenerator<hw>::store; \ |
1536 | using ngen::BinaryCodeGenerator<hw>::atomic; \ |
1537 | template <typename... Targs> void memfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::memfence(std::forward<Targs>(args)...); } \ |
1538 | template <typename... Targs> void slmfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::slmfence(std::forward<Targs>(args)...); } \ |
1539 | template <typename... Targs> void loadlid(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::loadlid(std::forward<Targs>(args)...); } \ |
1540 | template <typename... Targs> void loadargs(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::loadargs(std::forward<Targs>(args)...); } \ |
1541 | template <typename... Targs> void epilogue(int GRFCount, bool hasSLM, const ngen::RegData &r0_info) { ngen::BinaryCodeGenerator<hw>::epilogue(GRFCount, hasSLM, r0_info); } \ |
1542 | template <typename... Targs> void pushStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::pushStream(std::forward<Targs>(args)...); } \ |
1543 | template <typename... Targs> InstructionStream *popStream(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::popStream(std::forward<Targs>(args)...); } \ |
1544 | template <typename... Targs> void appendStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendStream(std::forward<Targs>(args)...); } \ |
1545 | template <typename... Targs> void appendCurrentStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendCurrentStream(std::forward<Targs>(args)...); } \ |
1546 | template <typename... Targs> void discardStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::discardStream(std::forward<Targs>(args)...); } \ |
1547 | template <typename... Targs> void mark(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::mark(std::forward<Targs>(args)...); } \ |
1548 | template <typename... Targs> void comment(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::comment(std::forward<Targs>(args)...); } \ |
1549 | template <typename... Targs> void setDefaultNoMask(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultNoMask(std::forward<Targs>(args)...); } \ |
1550 | template <typename... Targs> void setDefaultAutoSWSB(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultAutoSWSB(std::forward<Targs>(args)...); } \ |
1551 | bool getDefaultNoMask() { return ngen::BinaryCodeGenerator<hw>::getDefaultNoMask(); } \ |
1552 | bool getDefaultAutoSWSB() { return ngen::BinaryCodeGenerator<hw>::getDefaultAutoSWSB(); } \ |
1553 | using ngen::BinaryCodeGenerator<hw>::stepping; \ |
1554 | int getStepping() { return ngen::BinaryCodeGenerator<hw>::getStepping(); } \ |
1555 | void setStepping(int stepping_) { ngen::BinaryCodeGenerator<hw>::setStepping(stepping_); } \ |
1556 | NGEN_FORWARD_EXTRA \ |
1557 | NGEN_FORWARD_OP_NAMES \ |
1558 | NGEN_FORWARD_MIN_MAX \ |
1559 | NGEN_FORWARD_REGISTERS |
1560 | |
1561 | #define \ |
1562 | template <typename DT = void, typename... Targs> void add3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add3<DT>(std::forward<Targs>(args)...); } \ |
1563 | template <typename DT = void, typename... Targs> void bfn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfn<DT>(std::forward<Targs>(args)...); } \ |
1564 | template <typename DT = void, typename... Targs> void dp4a(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4a<DT>(std::forward<Targs>(args)...); } \ |
1565 | template <typename DT = void, typename... Targs> void dpas(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dpas<DT>(std::forward<Targs>(args)...); } \ |
1566 | template <typename DT = void, typename... Targs> void dpasw(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dpasw<DT>(std::forward<Targs>(args)...); } |
1567 | |
1568 | #ifdef NGEN_NO_OP_NAMES |
1569 | #define NGEN_FORWARD_OP_NAMES |
1570 | #else |
1571 | #define NGEN_FORWARD_OP_NAMES \ |
1572 | template <typename DT = void, typename... Targs> void and(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \ |
1573 | template <typename DT = void, typename... Targs> void not(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \ |
1574 | template <typename DT = void, typename... Targs> void or(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \ |
1575 | template <typename DT = void, typename... Targs> void xor(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); } |
1576 | #endif |
1577 | |
1578 | #ifdef NGEN_WINDOWS_COMPAT |
1579 | #define NGEN_FORWARD_MIN_MAX |
1580 | #else |
1581 | #define NGEN_FORWARD_MIN_MAX \ |
1582 | template <typename DT = void, typename... Targs> void min(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min<DT>(std::forward<Targs>(args)...); } \ |
1583 | template <typename DT = void, typename... Targs> void max(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max<DT>(std::forward<Targs>(args)...); } |
1584 | #endif |
1585 | |
1586 | #ifdef NGEN_GLOBAL_REGS |
1587 | #define NGEN_FORWARD_REGISTERS |
1588 | #else |
1589 | #define NGEN_FORWARD_REGISTERS_BASE \ |
1590 | using ngen::BinaryCodeGenerator<hw>::indirect; \ |
1591 | using ngen::BinaryCodeGenerator<hw>::r0; using ngen::BinaryCodeGenerator<hw>::r1; using ngen::BinaryCodeGenerator<hw>::r2; using ngen::BinaryCodeGenerator<hw>::r3; \ |
1592 | using ngen::BinaryCodeGenerator<hw>::r4; using ngen::BinaryCodeGenerator<hw>::r5; using ngen::BinaryCodeGenerator<hw>::r6; using ngen::BinaryCodeGenerator<hw>::r7; \ |
1593 | using ngen::BinaryCodeGenerator<hw>::r8; using ngen::BinaryCodeGenerator<hw>::r9; using ngen::BinaryCodeGenerator<hw>::r10; using ngen::BinaryCodeGenerator<hw>::r11; \ |
1594 | using ngen::BinaryCodeGenerator<hw>::r12; using ngen::BinaryCodeGenerator<hw>::r13; using ngen::BinaryCodeGenerator<hw>::r14; using ngen::BinaryCodeGenerator<hw>::r15; \ |
1595 | using ngen::BinaryCodeGenerator<hw>::r16; using ngen::BinaryCodeGenerator<hw>::r17; using ngen::BinaryCodeGenerator<hw>::r18; using ngen::BinaryCodeGenerator<hw>::r19; \ |
1596 | using ngen::BinaryCodeGenerator<hw>::r20; using ngen::BinaryCodeGenerator<hw>::r21; using ngen::BinaryCodeGenerator<hw>::r22; using ngen::BinaryCodeGenerator<hw>::r23; \ |
1597 | using ngen::BinaryCodeGenerator<hw>::r24; using ngen::BinaryCodeGenerator<hw>::r25; using ngen::BinaryCodeGenerator<hw>::r26; using ngen::BinaryCodeGenerator<hw>::r27; \ |
1598 | using ngen::BinaryCodeGenerator<hw>::r28; using ngen::BinaryCodeGenerator<hw>::r29; using ngen::BinaryCodeGenerator<hw>::r30; using ngen::BinaryCodeGenerator<hw>::r31; \ |
1599 | using ngen::BinaryCodeGenerator<hw>::r32; using ngen::BinaryCodeGenerator<hw>::r33; using ngen::BinaryCodeGenerator<hw>::r34; using ngen::BinaryCodeGenerator<hw>::r35; \ |
1600 | using ngen::BinaryCodeGenerator<hw>::r36; using ngen::BinaryCodeGenerator<hw>::r37; using ngen::BinaryCodeGenerator<hw>::r38; using ngen::BinaryCodeGenerator<hw>::r39; \ |
1601 | using ngen::BinaryCodeGenerator<hw>::r40; using ngen::BinaryCodeGenerator<hw>::r41; using ngen::BinaryCodeGenerator<hw>::r42; using ngen::BinaryCodeGenerator<hw>::r43; \ |
1602 | using ngen::BinaryCodeGenerator<hw>::r44; using ngen::BinaryCodeGenerator<hw>::r45; using ngen::BinaryCodeGenerator<hw>::r46; using ngen::BinaryCodeGenerator<hw>::r47; \ |
1603 | using ngen::BinaryCodeGenerator<hw>::r48; using ngen::BinaryCodeGenerator<hw>::r49; using ngen::BinaryCodeGenerator<hw>::r50; using ngen::BinaryCodeGenerator<hw>::r51; \ |
1604 | using ngen::BinaryCodeGenerator<hw>::r52; using ngen::BinaryCodeGenerator<hw>::r53; using ngen::BinaryCodeGenerator<hw>::r54; using ngen::BinaryCodeGenerator<hw>::r55; \ |
1605 | using ngen::BinaryCodeGenerator<hw>::r56; using ngen::BinaryCodeGenerator<hw>::r57; using ngen::BinaryCodeGenerator<hw>::r58; using ngen::BinaryCodeGenerator<hw>::r59; \ |
1606 | using ngen::BinaryCodeGenerator<hw>::r60; using ngen::BinaryCodeGenerator<hw>::r61; using ngen::BinaryCodeGenerator<hw>::r62; using ngen::BinaryCodeGenerator<hw>::r63; \ |
1607 | using ngen::BinaryCodeGenerator<hw>::r64; using ngen::BinaryCodeGenerator<hw>::r65; using ngen::BinaryCodeGenerator<hw>::r66; using ngen::BinaryCodeGenerator<hw>::r67; \ |
1608 | using ngen::BinaryCodeGenerator<hw>::r68; using ngen::BinaryCodeGenerator<hw>::r69; using ngen::BinaryCodeGenerator<hw>::r70; using ngen::BinaryCodeGenerator<hw>::r71; \ |
1609 | using ngen::BinaryCodeGenerator<hw>::r72; using ngen::BinaryCodeGenerator<hw>::r73; using ngen::BinaryCodeGenerator<hw>::r74; using ngen::BinaryCodeGenerator<hw>::r75; \ |
1610 | using ngen::BinaryCodeGenerator<hw>::r76; using ngen::BinaryCodeGenerator<hw>::r77; using ngen::BinaryCodeGenerator<hw>::r78; using ngen::BinaryCodeGenerator<hw>::r79; \ |
1611 | using ngen::BinaryCodeGenerator<hw>::r80; using ngen::BinaryCodeGenerator<hw>::r81; using ngen::BinaryCodeGenerator<hw>::r82; using ngen::BinaryCodeGenerator<hw>::r83; \ |
1612 | using ngen::BinaryCodeGenerator<hw>::r84; using ngen::BinaryCodeGenerator<hw>::r85; using ngen::BinaryCodeGenerator<hw>::r86; using ngen::BinaryCodeGenerator<hw>::r87; \ |
1613 | using ngen::BinaryCodeGenerator<hw>::r88; using ngen::BinaryCodeGenerator<hw>::r89; using ngen::BinaryCodeGenerator<hw>::r90; using ngen::BinaryCodeGenerator<hw>::r91; \ |
1614 | using ngen::BinaryCodeGenerator<hw>::r92; using ngen::BinaryCodeGenerator<hw>::r93; using ngen::BinaryCodeGenerator<hw>::r94; using ngen::BinaryCodeGenerator<hw>::r95; \ |
1615 | using ngen::BinaryCodeGenerator<hw>::r96; using ngen::BinaryCodeGenerator<hw>::r97; using ngen::BinaryCodeGenerator<hw>::r98; using ngen::BinaryCodeGenerator<hw>::r99; \ |
1616 | using ngen::BinaryCodeGenerator<hw>::r100; using ngen::BinaryCodeGenerator<hw>::r101; using ngen::BinaryCodeGenerator<hw>::r102; using ngen::BinaryCodeGenerator<hw>::r103; \ |
1617 | using ngen::BinaryCodeGenerator<hw>::r104; using ngen::BinaryCodeGenerator<hw>::r105; using ngen::BinaryCodeGenerator<hw>::r106; using ngen::BinaryCodeGenerator<hw>::r107; \ |
1618 | using ngen::BinaryCodeGenerator<hw>::r108; using ngen::BinaryCodeGenerator<hw>::r109; using ngen::BinaryCodeGenerator<hw>::r110; using ngen::BinaryCodeGenerator<hw>::r111; \ |
1619 | using ngen::BinaryCodeGenerator<hw>::r112; using ngen::BinaryCodeGenerator<hw>::r113; using ngen::BinaryCodeGenerator<hw>::r114; using ngen::BinaryCodeGenerator<hw>::r115; \ |
1620 | using ngen::BinaryCodeGenerator<hw>::r116; using ngen::BinaryCodeGenerator<hw>::r117; using ngen::BinaryCodeGenerator<hw>::r118; using ngen::BinaryCodeGenerator<hw>::r119; \ |
1621 | using ngen::BinaryCodeGenerator<hw>::r120; using ngen::BinaryCodeGenerator<hw>::r121; using ngen::BinaryCodeGenerator<hw>::r122; using ngen::BinaryCodeGenerator<hw>::r123; \ |
1622 | using ngen::BinaryCodeGenerator<hw>::r124; using ngen::BinaryCodeGenerator<hw>::r125; using ngen::BinaryCodeGenerator<hw>::r126; using ngen::BinaryCodeGenerator<hw>::r127; \ |
1623 | using ngen::BinaryCodeGenerator<hw>::null; \ |
1624 | using ngen::BinaryCodeGenerator<hw>::a0; \ |
1625 | using ngen::BinaryCodeGenerator<hw>::acc0; using ngen::BinaryCodeGenerator<hw>::acc1; using ngen::BinaryCodeGenerator<hw>::acc2; using ngen::BinaryCodeGenerator<hw>::acc3; \ |
1626 | using ngen::BinaryCodeGenerator<hw>::acc4; using ngen::BinaryCodeGenerator<hw>::acc5; using ngen::BinaryCodeGenerator<hw>::acc6; using ngen::BinaryCodeGenerator<hw>::acc7; \ |
1627 | using ngen::BinaryCodeGenerator<hw>::acc8; using ngen::BinaryCodeGenerator<hw>::acc9; \ |
1628 | using ngen::BinaryCodeGenerator<hw>::mme0; using ngen::BinaryCodeGenerator<hw>::mme1; using ngen::BinaryCodeGenerator<hw>::mme2; using ngen::BinaryCodeGenerator<hw>::mme3; \ |
1629 | using ngen::BinaryCodeGenerator<hw>::mme4; using ngen::BinaryCodeGenerator<hw>::mme5; using ngen::BinaryCodeGenerator<hw>::mme6; using ngen::BinaryCodeGenerator<hw>::mme7; \ |
1630 | using ngen::BinaryCodeGenerator<hw>::noacc; using ngen::BinaryCodeGenerator<hw>::nomme; \ |
1631 | using ngen::BinaryCodeGenerator<hw>::f0; using ngen::BinaryCodeGenerator<hw>::f1; \ |
1632 | using ngen::BinaryCodeGenerator<hw>::ce0; using ngen::BinaryCodeGenerator<hw>::sp; using ngen::BinaryCodeGenerator<hw>::sr0; using ngen::BinaryCodeGenerator<hw>::sr1; \ |
1633 | using ngen::BinaryCodeGenerator<hw>::cr0; using ngen::BinaryCodeGenerator<hw>::n0; using ngen::BinaryCodeGenerator<hw>::ip; using ngen::BinaryCodeGenerator<hw>::tdr0; \ |
1634 | using ngen::BinaryCodeGenerator<hw>::tm0; using ngen::BinaryCodeGenerator<hw>::tm1; using ngen::BinaryCodeGenerator<hw>::tm2; using ngen::BinaryCodeGenerator<hw>::tm3; \ |
1635 | using ngen::BinaryCodeGenerator<hw>::tm4; using ngen::BinaryCodeGenerator<hw>::pm0; using ngen::BinaryCodeGenerator<hw>::tp0; using ngen::BinaryCodeGenerator<hw>::dbg0; \ |
1636 | using ngen::BinaryCodeGenerator<hw>::fc0; using ngen::BinaryCodeGenerator<hw>::fc1; using ngen::BinaryCodeGenerator<hw>::fc2; using ngen::BinaryCodeGenerator<hw>::fc3; \ |
1637 | using ngen::BinaryCodeGenerator<hw>::NoDDClr; using ngen::BinaryCodeGenerator<hw>::NoDDChk; \ |
1638 | using ngen::BinaryCodeGenerator<hw>::AccWrEn; using ngen::BinaryCodeGenerator<hw>::NoSrcDepSet; using ngen::BinaryCodeGenerator<hw>::Breakpoint; using ngen::BinaryCodeGenerator<hw>::sat; \ |
1639 | using ngen::BinaryCodeGenerator<hw>::NoMask; \ |
1640 | using ngen::BinaryCodeGenerator<hw>::Serialize; using ngen::BinaryCodeGenerator<hw>::EOT; \ |
1641 | using ngen::BinaryCodeGenerator<hw>::Atomic; using ngen::BinaryCodeGenerator<hw>::Switch; using ngen::BinaryCodeGenerator<hw>::NoPreempt; \ |
1642 | using ngen::BinaryCodeGenerator<hw>::anyv; using ngen::BinaryCodeGenerator<hw>::allv; using ngen::BinaryCodeGenerator<hw>::any2h; using ngen::BinaryCodeGenerator<hw>::all2h; \ |
1643 | using ngen::BinaryCodeGenerator<hw>::any4h; using ngen::BinaryCodeGenerator<hw>::all4h; using ngen::BinaryCodeGenerator<hw>::any8h; using ngen::BinaryCodeGenerator<hw>::all8h; \ |
1644 | using ngen::BinaryCodeGenerator<hw>::any16h; using ngen::BinaryCodeGenerator<hw>::all16h; using ngen::BinaryCodeGenerator<hw>::any32h; using ngen::BinaryCodeGenerator<hw>::all32h; \ |
1645 | using ngen::BinaryCodeGenerator<hw>::x_repl; using ngen::BinaryCodeGenerator<hw>::y_repl; using ngen::BinaryCodeGenerator<hw>::z_repl; using ngen::BinaryCodeGenerator<hw>::w_repl; \ |
1646 | using ngen::BinaryCodeGenerator<hw>::ze; using ngen::BinaryCodeGenerator<hw>::eq; using ngen::BinaryCodeGenerator<hw>::nz; using ngen::BinaryCodeGenerator<hw>::ne; \ |
1647 | using ngen::BinaryCodeGenerator<hw>::gt; using ngen::BinaryCodeGenerator<hw>::ge; using ngen::BinaryCodeGenerator<hw>::lt; using ngen::BinaryCodeGenerator<hw>::le; \ |
1648 | using ngen::BinaryCodeGenerator<hw>::ov; using ngen::BinaryCodeGenerator<hw>::un; using ngen::BinaryCodeGenerator<hw>::eo; \ |
1649 | using ngen::BinaryCodeGenerator<hw>::M0; using ngen::BinaryCodeGenerator<hw>::M4; using ngen::BinaryCodeGenerator<hw>::M8; using ngen::BinaryCodeGenerator<hw>::M12; \ |
1650 | using ngen::BinaryCodeGenerator<hw>::M16; using ngen::BinaryCodeGenerator<hw>::M20; using ngen::BinaryCodeGenerator<hw>::M24; using ngen::BinaryCodeGenerator<hw>::M28; \ |
1651 | using ngen::BinaryCodeGenerator<hw>::sb0; using ngen::BinaryCodeGenerator<hw>::sb1; using ngen::BinaryCodeGenerator<hw>::sb2; using ngen::BinaryCodeGenerator<hw>::sb3; \ |
1652 | using ngen::BinaryCodeGenerator<hw>::sb4; using ngen::BinaryCodeGenerator<hw>::sb5; using ngen::BinaryCodeGenerator<hw>::sb6; using ngen::BinaryCodeGenerator<hw>::sb7; \ |
1653 | using ngen::BinaryCodeGenerator<hw>::sb8; using ngen::BinaryCodeGenerator<hw>::sb9; using ngen::BinaryCodeGenerator<hw>::sb10; using ngen::BinaryCodeGenerator<hw>::sb11; \ |
1654 | using ngen::BinaryCodeGenerator<hw>::sb12; using ngen::BinaryCodeGenerator<hw>::sb13; using ngen::BinaryCodeGenerator<hw>::sb14; using ngen::BinaryCodeGenerator<hw>::sb15; \ |
1655 | using ngen::BinaryCodeGenerator<hw>::A32; using ngen::BinaryCodeGenerator<hw>::A32NC; using ngen::BinaryCodeGenerator<hw>::A64; using ngen::BinaryCodeGenerator<hw>::A64NC; \ |
1656 | using ngen::BinaryCodeGenerator<hw>::SLM; \ |
1657 | template <typename... Targs> ngen::InstructionModifier ExecutionOffset(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::ExecutionOffset(std::forward<Targs>(args)...); } \ |
1658 | template <typename... Targs> ngen::AddressBase Surface(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::Surface(std::forward<Targs>(args)...); } \ |
1659 | template <typename... Targs> ngen::AddressBase CC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::CC(std::forward<Targs>(args)...); } \ |
1660 | template <typename... Targs> ngen::AddressBase SC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::SC(std::forward<Targs>(args)...); } |
1661 | #define \ |
1662 | using ngen::BinaryCodeGenerator<hw>::r128; using ngen::BinaryCodeGenerator<hw>::r129; using ngen::BinaryCodeGenerator<hw>::r130; using ngen::BinaryCodeGenerator<hw>::r131; \ |
1663 | using ngen::BinaryCodeGenerator<hw>::r132; using ngen::BinaryCodeGenerator<hw>::r133; using ngen::BinaryCodeGenerator<hw>::r134; using ngen::BinaryCodeGenerator<hw>::r135; \ |
1664 | using ngen::BinaryCodeGenerator<hw>::r136; using ngen::BinaryCodeGenerator<hw>::r137; using ngen::BinaryCodeGenerator<hw>::r138; using ngen::BinaryCodeGenerator<hw>::r139; \ |
1665 | using ngen::BinaryCodeGenerator<hw>::r140; using ngen::BinaryCodeGenerator<hw>::r141; using ngen::BinaryCodeGenerator<hw>::r142; using ngen::BinaryCodeGenerator<hw>::r143; \ |
1666 | using ngen::BinaryCodeGenerator<hw>::r144; using ngen::BinaryCodeGenerator<hw>::r145; using ngen::BinaryCodeGenerator<hw>::r146; using ngen::BinaryCodeGenerator<hw>::r147; \ |
1667 | using ngen::BinaryCodeGenerator<hw>::r148; using ngen::BinaryCodeGenerator<hw>::r149; using ngen::BinaryCodeGenerator<hw>::r150; using ngen::BinaryCodeGenerator<hw>::r151; \ |
1668 | using ngen::BinaryCodeGenerator<hw>::r152; using ngen::BinaryCodeGenerator<hw>::r153; using ngen::BinaryCodeGenerator<hw>::r154; using ngen::BinaryCodeGenerator<hw>::r155; \ |
1669 | using ngen::BinaryCodeGenerator<hw>::r156; using ngen::BinaryCodeGenerator<hw>::r157; using ngen::BinaryCodeGenerator<hw>::r158; using ngen::BinaryCodeGenerator<hw>::r159; \ |
1670 | using ngen::BinaryCodeGenerator<hw>::r160; using ngen::BinaryCodeGenerator<hw>::r161; using ngen::BinaryCodeGenerator<hw>::r162; using ngen::BinaryCodeGenerator<hw>::r163; \ |
1671 | using ngen::BinaryCodeGenerator<hw>::r164; using ngen::BinaryCodeGenerator<hw>::r165; using ngen::BinaryCodeGenerator<hw>::r166; using ngen::BinaryCodeGenerator<hw>::r167; \ |
1672 | using ngen::BinaryCodeGenerator<hw>::r168; using ngen::BinaryCodeGenerator<hw>::r169; using ngen::BinaryCodeGenerator<hw>::r170; using ngen::BinaryCodeGenerator<hw>::r171; \ |
1673 | using ngen::BinaryCodeGenerator<hw>::r172; using ngen::BinaryCodeGenerator<hw>::r173; using ngen::BinaryCodeGenerator<hw>::r174; using ngen::BinaryCodeGenerator<hw>::r175; \ |
1674 | using ngen::BinaryCodeGenerator<hw>::r176; using ngen::BinaryCodeGenerator<hw>::r177; using ngen::BinaryCodeGenerator<hw>::r178; using ngen::BinaryCodeGenerator<hw>::r179; \ |
1675 | using ngen::BinaryCodeGenerator<hw>::r180; using ngen::BinaryCodeGenerator<hw>::r181; using ngen::BinaryCodeGenerator<hw>::r182; using ngen::BinaryCodeGenerator<hw>::r183; \ |
1676 | using ngen::BinaryCodeGenerator<hw>::r184; using ngen::BinaryCodeGenerator<hw>::r185; using ngen::BinaryCodeGenerator<hw>::r186; using ngen::BinaryCodeGenerator<hw>::r187; \ |
1677 | using ngen::BinaryCodeGenerator<hw>::r188; using ngen::BinaryCodeGenerator<hw>::r189; using ngen::BinaryCodeGenerator<hw>::r190; using ngen::BinaryCodeGenerator<hw>::r191; \ |
1678 | using ngen::BinaryCodeGenerator<hw>::r192; using ngen::BinaryCodeGenerator<hw>::r193; using ngen::BinaryCodeGenerator<hw>::r194; using ngen::BinaryCodeGenerator<hw>::r195; \ |
1679 | using ngen::BinaryCodeGenerator<hw>::r196; using ngen::BinaryCodeGenerator<hw>::r197; using ngen::BinaryCodeGenerator<hw>::r198; using ngen::BinaryCodeGenerator<hw>::r199; \ |
1680 | using ngen::BinaryCodeGenerator<hw>::r200; using ngen::BinaryCodeGenerator<hw>::r201; using ngen::BinaryCodeGenerator<hw>::r202; using ngen::BinaryCodeGenerator<hw>::r203; \ |
1681 | using ngen::BinaryCodeGenerator<hw>::r204; using ngen::BinaryCodeGenerator<hw>::r205; using ngen::BinaryCodeGenerator<hw>::r206; using ngen::BinaryCodeGenerator<hw>::r207; \ |
1682 | using ngen::BinaryCodeGenerator<hw>::r208; using ngen::BinaryCodeGenerator<hw>::r209; using ngen::BinaryCodeGenerator<hw>::r210; using ngen::BinaryCodeGenerator<hw>::r211; \ |
1683 | using ngen::BinaryCodeGenerator<hw>::r212; using ngen::BinaryCodeGenerator<hw>::r213; using ngen::BinaryCodeGenerator<hw>::r214; using ngen::BinaryCodeGenerator<hw>::r215; \ |
1684 | using ngen::BinaryCodeGenerator<hw>::r216; using ngen::BinaryCodeGenerator<hw>::r217; using ngen::BinaryCodeGenerator<hw>::r218; using ngen::BinaryCodeGenerator<hw>::r219; \ |
1685 | using ngen::BinaryCodeGenerator<hw>::r220; using ngen::BinaryCodeGenerator<hw>::r221; using ngen::BinaryCodeGenerator<hw>::r222; using ngen::BinaryCodeGenerator<hw>::r223; \ |
1686 | using ngen::BinaryCodeGenerator<hw>::r224; using ngen::BinaryCodeGenerator<hw>::r225; using ngen::BinaryCodeGenerator<hw>::r226; using ngen::BinaryCodeGenerator<hw>::r227; \ |
1687 | using ngen::BinaryCodeGenerator<hw>::r228; using ngen::BinaryCodeGenerator<hw>::r229; using ngen::BinaryCodeGenerator<hw>::r230; using ngen::BinaryCodeGenerator<hw>::r231; \ |
1688 | using ngen::BinaryCodeGenerator<hw>::r232; using ngen::BinaryCodeGenerator<hw>::r233; using ngen::BinaryCodeGenerator<hw>::r234; using ngen::BinaryCodeGenerator<hw>::r235; \ |
1689 | using ngen::BinaryCodeGenerator<hw>::r236; using ngen::BinaryCodeGenerator<hw>::r237; using ngen::BinaryCodeGenerator<hw>::r238; using ngen::BinaryCodeGenerator<hw>::r239; \ |
1690 | using ngen::BinaryCodeGenerator<hw>::r240; using ngen::BinaryCodeGenerator<hw>::r241; using ngen::BinaryCodeGenerator<hw>::r242; using ngen::BinaryCodeGenerator<hw>::r243; \ |
1691 | using ngen::BinaryCodeGenerator<hw>::r244; using ngen::BinaryCodeGenerator<hw>::r245; using ngen::BinaryCodeGenerator<hw>::r246; using ngen::BinaryCodeGenerator<hw>::r247; \ |
1692 | using ngen::BinaryCodeGenerator<hw>::r248; using ngen::BinaryCodeGenerator<hw>::r249; using ngen::BinaryCodeGenerator<hw>::r250; using ngen::BinaryCodeGenerator<hw>::r251; \ |
1693 | using ngen::BinaryCodeGenerator<hw>::r252; using ngen::BinaryCodeGenerator<hw>::r253; using ngen::BinaryCodeGenerator<hw>::r254; using ngen::BinaryCodeGenerator<hw>::r255; |
1694 | #define \ |
1695 | using ngen::BinaryCodeGenerator<hw>::D8; using ngen::BinaryCodeGenerator<hw>::D16; using ngen::BinaryCodeGenerator<hw>::D32; using ngen::BinaryCodeGenerator<hw>::D64; \ |
1696 | using ngen::BinaryCodeGenerator<hw>::D8U32; using ngen::BinaryCodeGenerator<hw>::D16U32; \ |
1697 | using ngen::BinaryCodeGenerator<hw>::D8T; using ngen::BinaryCodeGenerator<hw>::D16T; using ngen::BinaryCodeGenerator<hw>::D32T; using ngen::BinaryCodeGenerator<hw>::D64T; \ |
1698 | using ngen::BinaryCodeGenerator<hw>::D8U32T; using ngen::BinaryCodeGenerator<hw>::D16U32T; \ |
1699 | using ngen::BinaryCodeGenerator<hw>::V1; using ngen::BinaryCodeGenerator<hw>::V2; using ngen::BinaryCodeGenerator<hw>::V3; using ngen::BinaryCodeGenerator<hw>::V4; \ |
1700 | using ngen::BinaryCodeGenerator<hw>::V8; using ngen::BinaryCodeGenerator<hw>::V16; using ngen::BinaryCodeGenerator<hw>::V32; using ngen::BinaryCodeGenerator<hw>::V64; \ |
1701 | using ngen::BinaryCodeGenerator<hw>::V1T; using ngen::BinaryCodeGenerator<hw>::V2T; using ngen::BinaryCodeGenerator<hw>::V3T; using ngen::BinaryCodeGenerator<hw>::V4T; \ |
1702 | using ngen::BinaryCodeGenerator<hw>::V8T; using ngen::BinaryCodeGenerator<hw>::V16T; using ngen::BinaryCodeGenerator<hw>::V32T; using ngen::BinaryCodeGenerator<hw>::V64T; \ |
1703 | using ngen::BinaryCodeGenerator<hw>::transpose; \ |
1704 | using ngen::BinaryCodeGenerator<hw>::L1UC_L3UC; using ngen::BinaryCodeGenerator<hw>::L1UC_L3C; using ngen::BinaryCodeGenerator<hw>::L1C_L3UC; using ngen::BinaryCodeGenerator<hw>::L1C_L3C; \ |
1705 | using ngen::BinaryCodeGenerator<hw>::L1S_L3UC; using ngen::BinaryCodeGenerator<hw>::L1S_L3C; using ngen::BinaryCodeGenerator<hw>::L1IAR_L3C; using ngen::BinaryCodeGenerator<hw>::L1UC_L3WB; \ |
1706 | using ngen::BinaryCodeGenerator<hw>::L1WT_L3UC; using ngen::BinaryCodeGenerator<hw>::L1WT_L3WB; using ngen::BinaryCodeGenerator<hw>::L1S_L3WB; using ngen::BinaryCodeGenerator<hw>::L1WB_L3WB; |
1707 | #define \ |
1708 | using ngen::BinaryCodeGenerator<hw>::any; using ngen::BinaryCodeGenerator<hw>::all; \ |
1709 | using ngen::BinaryCodeGenerator<hw>::f2; using ngen::BinaryCodeGenerator<hw>::f3; \ |
1710 | using ngen::BinaryCodeGenerator<hw>::sb16; using ngen::BinaryCodeGenerator<hw>::sb17; using ngen::BinaryCodeGenerator<hw>::sb18; using ngen::BinaryCodeGenerator<hw>::sb19; \ |
1711 | using ngen::BinaryCodeGenerator<hw>::sb20; using ngen::BinaryCodeGenerator<hw>::sb21; using ngen::BinaryCodeGenerator<hw>::sb22; using ngen::BinaryCodeGenerator<hw>::sb23; \ |
1712 | using ngen::BinaryCodeGenerator<hw>::sb24; using ngen::BinaryCodeGenerator<hw>::sb25; using ngen::BinaryCodeGenerator<hw>::sb26; using ngen::BinaryCodeGenerator<hw>::sb27; \ |
1713 | using ngen::BinaryCodeGenerator<hw>::sb28; using ngen::BinaryCodeGenerator<hw>::sb29; using ngen::BinaryCodeGenerator<hw>::sb30; using ngen::BinaryCodeGenerator<hw>::sb31; \ |
1714 | using ngen::BinaryCodeGenerator<hw>::NoAccSBSet; \ |
1715 | using ngen::BinaryCodeGenerator<hw>::vnni; |
1716 | #define NGEN_FORWARD_REGISTERS NGEN_FORWARD_REGISTERS_BASE NGEN_FORWARD_REGISTERS_EXTRA1 NGEN_FORWARD_REGISTERS_EXTRA2 NGEN_FORWARD_REGISTERS_EXTRA3 |
1717 | #endif |
1718 | |
1719 | template <HW hw> |
1720 | inline void BinaryCodeGenerator<hw>::unsupported() |
1721 | { |
1722 | #ifdef NGEN_SAFE |
1723 | throw unsupported_instruction(); |
1724 | #endif |
1725 | } |
1726 | |
1727 | template <HW hw> |
1728 | typename BinaryCodeGenerator<hw>::InstructionStream *BinaryCodeGenerator<hw>::popStream() |
1729 | { |
1730 | #ifdef NGEN_SAFE |
1731 | if (streamStack.size() <= 1) throw stream_stack_underflow(); |
1732 | #endif |
1733 | |
1734 | InstructionStream *result = streamStack.back(); |
1735 | streamStack.pop_back(); |
1736 | return result; |
1737 | } |
1738 | |
1739 | template <HW hw> |
1740 | static inline Instruction12 encodeSyncInsertion(autoswsb::SyncInsertion &si) |
1741 | { |
1742 | Instruction12 i; |
1743 | |
1744 | i.common.opcode = static_cast<int>(Opcode::sync); |
1745 | i.common.swsb = (hw >= HW::XeHPC) ? SWSBInfoXeHPC(si.swsb, Opcode::sync).raw() |
1746 | : SWSBInfo12(si.swsb, Opcode::sync).raw(); |
1747 | i.common.maskCtrl = true; |
1748 | i.binary.cmod = static_cast<int>(si.fc); |
1749 | |
1750 | if (si.mask) { |
1751 | i.binary.src0Type = getTypecode12(DataType::ud); |
1752 | i.binary.src0Imm = true; |
1753 | i.imm32.value = si.mask; |
1754 | } |
1755 | i.binary.dst = 1; |
1756 | |
1757 | return i; |
1758 | } |
1759 | |
1760 | template <HW hw> |
1761 | std::vector<uint8_t> BinaryCodeGenerator<hw>::getCode() |
1762 | { |
1763 | #ifdef NGEN_SAFE |
1764 | if (streamStack.size() > 1) throw unfinished_stream_exception(); |
1765 | #endif |
1766 | rootStream.fixLabels(labelManager); |
1767 | |
1768 | Program program(rootStream); |
1769 | autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hw, program); |
1770 | std::vector<uint8_t> result; |
1771 | |
1772 | if (analysis.empty()) { |
1773 | result.resize(rootStream.length()); |
1774 | std::memmove(result.data(), rootStream.code.data(), rootStream.length()); |
1775 | } else { |
1776 | std::multimap<int32_t, autoswsb::SyncInsertion*> syncs; |
1777 | |
1778 | for (auto &bb : analysis) |
1779 | for (auto &sync : bb.syncs) |
1780 | syncs.insert(std::make_pair(sync.inum, &sync)); |
1781 | |
1782 | result.resize(rootStream.length() + syncs.size() * sizeof(Instruction12)); |
1783 | |
1784 | auto *psrc = reinterpret_cast<const Instruction12 *>(rootStream.code.data()); |
1785 | auto *pdst = reinterpret_cast<Instruction12 *>(result.data()); |
1786 | auto nextSync = syncs.begin(); |
1787 | |
1788 | for (uint32_t isrc = 0; isrc < program.size(); isrc++, psrc++) { |
1789 | if (psrc->opcode() == Opcode::wrdep) |
1790 | continue; |
1791 | while ((nextSync != syncs.end()) && (nextSync->second->inum == isrc)) |
1792 | *pdst++ = encodeSyncInsertion<hw>(*(nextSync++)->second); |
1793 | *pdst++ = *psrc; |
1794 | } |
1795 | |
1796 | result.resize(reinterpret_cast<uint8_t *>(pdst) - result.data()); |
1797 | } |
1798 | |
1799 | return result; |
1800 | } |
1801 | |
1802 | template <HW hw> |
1803 | template <bool forceWE, typename D, typename S0, HW hw_> |
1804 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
1805 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0) |
1806 | { |
1807 | Instruction8 i{}; |
1808 | InstructionModifier emod = mod | defaultModifier; |
1809 | if (forceWE) |
1810 | emod |= NoMask; |
1811 | |
1812 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 1); |
1813 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 1); |
1814 | |
1815 | encodeCommon8(i, op, emod); |
1816 | i.common.accessMode = std::is_base_of<Align16Operand, D>::value; |
1817 | |
1818 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
1819 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
1820 | |
1821 | if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9; |
1822 | if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9; |
1823 | |
1824 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
1825 | i.binary.src0Type = getTypecode<hw>(src0.getType()); |
1826 | |
1827 | i.binary.dstRegFile = getRegFile(dst); |
1828 | i.binary.src0RegFile = getRegFile(src0); |
1829 | |
1830 | db(i); |
1831 | } |
1832 | |
1833 | template <HW hw> |
1834 | template <bool forceWE, typename D, typename S0, HW hw_> |
1835 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
1836 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0) |
1837 | { |
1838 | typename EncodingTag12Dispatch<hw>::tag tag; |
1839 | Instruction12 i{}; |
1840 | |
1841 | InstructionModifier emod = mod | defaultModifier; |
1842 | if (forceWE) |
1843 | emod |= NoMask; |
1844 | |
1845 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 1); |
1846 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 1); |
1847 | |
1848 | encodeCommon12(i, op, emod, dst, tag); |
1849 | |
1850 | i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits; |
1851 | i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits; |
1852 | |
1853 | i.binary.dstAddrMode = dst.isIndirect(); |
1854 | i.binary.dstType = getTypecode12(dst.getType()); |
1855 | i.binary.src0Type = getTypecode12(src0.getType()); |
1856 | |
1857 | i.binary.src0Mods = src0.getMods(); |
1858 | |
1859 | i.binary.cmod = static_cast<int>(mod.getCMod()); |
1860 | |
1861 | db(i); |
1862 | } |
1863 | |
1864 | template <HW hw> |
1865 | template <bool forceWE, typename D, HW hw_> |
1866 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
1867 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0) |
1868 | { |
1869 | Instruction8 i{}; |
1870 | InstructionModifier emod = mod | defaultModifier; |
1871 | if (forceWE) |
1872 | emod |= NoMask; |
1873 | |
1874 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 1); |
1875 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 1); |
1876 | |
1877 | encodeCommon8(i, op, emod); |
1878 | i.common.accessMode = std::is_base_of<Align16Operand, D>::value; |
1879 | |
1880 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
1881 | |
1882 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
1883 | i.binary.src0Type = getImmediateTypecode<hw>(src0.getType()); |
1884 | |
1885 | i.binary.dstRegFile = getRegFile(dst); |
1886 | i.binary.src0RegFile = getRegFile(src0); |
1887 | |
1888 | if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9; |
1889 | |
1890 | if (getBytes(src0.getType()) == 8) |
1891 | i.imm64.value = static_cast<uint64_t>(src0); |
1892 | else |
1893 | i.imm32.value = static_cast<uint64_t>(src0); |
1894 | |
1895 | db(i); |
1896 | } |
1897 | |
1898 | template <HW hw> |
1899 | template <bool forceWE, typename D, HW hw_> |
1900 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
1901 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0) |
1902 | { |
1903 | typename EncodingTag12Dispatch<hw>::tag tag; |
1904 | Instruction12 i{}; |
1905 | |
1906 | InstructionModifier emod = mod | defaultModifier; |
1907 | if (forceWE) |
1908 | emod |= NoMask; |
1909 | |
1910 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 1); |
1911 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 1); |
1912 | |
1913 | encodeCommon12(i, op, emod, dst, tag); |
1914 | |
1915 | i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits; |
1916 | |
1917 | i.binary.dstAddrMode = dst.isIndirect(); |
1918 | |
1919 | i.binary.dstType = getTypecode12(dst.getType()); |
1920 | i.binary.src0Type = getTypecode12(src0.getType()); |
1921 | |
1922 | i.binary.src0Imm = true; |
1923 | |
1924 | i.binary.cmod = static_cast<int>(mod.getCMod()); |
1925 | |
1926 | auto val = static_cast<uint64_t>(src0); |
1927 | i.imm32.value = val; |
1928 | if (getBytes(src0.getType()) == 8) { |
1929 | #ifdef NGEN_SAFE |
1930 | if (mod.getCMod() != ConditionModifier::none) throw invalid_modifiers_exception(); |
1931 | #endif |
1932 | i.imm64.high = val >> 32; |
1933 | } |
1934 | |
1935 | db(i); |
1936 | } |
1937 | |
1938 | template <HW hw> |
1939 | template <bool forceWE, typename D, typename S0, typename S1, HW hw_> |
1940 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
1941 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1) |
1942 | { |
1943 | Instruction8 i{}; |
1944 | |
1945 | InstructionModifier emod = mod | defaultModifier; |
1946 | if (forceWE) |
1947 | emod |= NoMask; |
1948 | |
1949 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 2); |
1950 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
1951 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
1952 | |
1953 | encodeCommon8(i, op, emod); |
1954 | i.common.accessMode = std::is_base_of<Align16Operand, D>::value; |
1955 | |
1956 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
1957 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
1958 | i.binary.src1 = encodeBinaryOperand8<false>(src1).bits; |
1959 | |
1960 | if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9; |
1961 | if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9; |
1962 | if (src1.isIndirect()) i.binary.src1AddrImm9 = src1.getOffset() >> 9; |
1963 | |
1964 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
1965 | i.binary.src0Type = getTypecode<hw>(src0.getType()); |
1966 | i.binary.src1Type = getTypecode<hw>(src1.getType()); |
1967 | |
1968 | i.binary.dstRegFile = getRegFile(dst); |
1969 | i.binary.src0RegFile = getRegFile(src0); |
1970 | i.binary.src1RegFile = getRegFile(src1); |
1971 | |
1972 | #ifdef NGEN_SAFE |
1973 | if (src1.isARF() && op != Opcode::illegal && op != Opcode::movi) throw grf_expected_exception(); |
1974 | #endif |
1975 | |
1976 | db(i); |
1977 | } |
1978 | |
1979 | template <HW hw> |
1980 | template <bool forceWE, typename D, typename S0, typename S1, HW hw_> |
1981 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
1982 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1) |
1983 | { |
1984 | typename EncodingTag12Dispatch<hw>::tag tag; |
1985 | Instruction12 i{}; |
1986 | |
1987 | InstructionModifier emod = mod | defaultModifier; |
1988 | if (forceWE) |
1989 | emod |= NoMask; |
1990 | |
1991 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 2); |
1992 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
1993 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
1994 | |
1995 | encodeCommon12(i, op, emod, dst, tag); |
1996 | |
1997 | i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits; |
1998 | i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits; |
1999 | i.binary.src1 = encodeBinaryOperand12<false>(src1, tag).bits; |
2000 | |
2001 | i.binary.dstAddrMode = dst.isIndirect(); |
2002 | i.binary.dstType = getTypecode12(dst.getType()); |
2003 | i.binary.src0Type = getTypecode12(src0.getType()); |
2004 | i.binary.src1Type = getTypecode12(src1.getType()); |
2005 | |
2006 | i.binary.src0Mods = src0.getMods(); |
2007 | i.binary.src1Mods = src1.getMods(); |
2008 | |
2009 | i.binary.cmod = static_cast<int>(mod.getCMod()); |
2010 | |
2011 | db(i); |
2012 | } |
2013 | |
2014 | template <HW hw> |
2015 | template <bool forceWE, typename D, typename S0, HW hw_> |
2016 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2017 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1) |
2018 | { |
2019 | Instruction8 i{}; |
2020 | InstructionModifier emod = mod | defaultModifier; |
2021 | if (forceWE) |
2022 | emod |= NoMask; |
2023 | |
2024 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 2); |
2025 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
2026 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
2027 | |
2028 | encodeCommon8(i, op, emod); |
2029 | i.common.accessMode = std::is_base_of<Align16Operand, D>::value; |
2030 | |
2031 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2032 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
2033 | |
2034 | if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9; |
2035 | if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9; |
2036 | |
2037 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
2038 | i.binary.src0Type = getTypecode<hw>(src0.getType()); |
2039 | i.binary.src1Type = getImmediateTypecode<hw>(src1.getType()); |
2040 | |
2041 | i.binary.dstRegFile = getRegFile(dst); |
2042 | i.binary.src0RegFile = getRegFile(src0); |
2043 | i.binary.src1RegFile = getRegFile(src1); |
2044 | |
2045 | i.imm32.value = static_cast<uint64_t>(src1); |
2046 | |
2047 | db(i); |
2048 | } |
2049 | |
2050 | template <HW hw> |
2051 | template <bool forceWE, typename D, typename S0, HW hw_> |
2052 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2053 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1) |
2054 | { |
2055 | typename EncodingTag12Dispatch<hw>::tag tag; |
2056 | Instruction12 i{}; |
2057 | |
2058 | InstructionModifier emod = mod | defaultModifier; |
2059 | if (forceWE) |
2060 | emod |= NoMask; |
2061 | |
2062 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 2); |
2063 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
2064 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 2); |
2065 | |
2066 | encodeCommon12(i, op, emod, dst, tag); |
2067 | |
2068 | i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits; |
2069 | i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits; |
2070 | i.binary.src1 = static_cast<uint64_t>(src1); |
2071 | |
2072 | i.binary.dstAddrMode = dst.isIndirect(); |
2073 | i.binary.dstType = getTypecode12(dst.getType()); |
2074 | i.binary.src0Type = getTypecode12(src0.getType()); |
2075 | i.binary.src1Type = getTypecode12(src1.getType()); |
2076 | |
2077 | i.binary.src0Mods = src0.getMods(); |
2078 | |
2079 | i.binary.cmod = static_cast<int>(mod.getCMod()); |
2080 | |
2081 | i.binary.src1Imm = true; |
2082 | i.imm32.value = static_cast<uint64_t>(src1); |
2083 | |
2084 | db(i); |
2085 | } |
2086 | |
2087 | template <HW hw> |
2088 | template <HW hw_> |
2089 | typename std::enable_if<hwLE(hw_, HW::Gen9)>::type |
2090 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2) |
2091 | { |
2092 | opX(op, defaultType, mod, emulateAlign16Dst(dst), emulateAlign16Src(src0), |
2093 | emulateAlign16Src(src1), emulateAlign16Src(src2)); |
2094 | } |
2095 | |
2096 | |
2097 | template <HW hw> |
2098 | template <HW hw_> |
2099 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2100 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2) |
2101 | { |
2102 | #ifdef NGEN_SAFE |
2103 | if (dst.getReg().isARF()) throw grf_expected_exception(); |
2104 | if (src0.getReg().isARF()) throw grf_expected_exception(); |
2105 | if (src1.getReg().isARF()) throw grf_expected_exception(); |
2106 | if (src2.getReg().isARF()) throw grf_expected_exception(); |
2107 | #endif |
2108 | |
2109 | Instruction8 i{}; |
2110 | InstructionModifier emod = mod | defaultModifier | Align16; |
2111 | |
2112 | dst.getReg().fixup(hw, emod.getExecSize(), defaultType, true, 3); |
2113 | src0.getReg().fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2114 | src1.getReg().fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2115 | src2.getReg().fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2116 | |
2117 | encodeCommon8(i, op, emod); |
2118 | |
2119 | i.ternary16.dstChanEn = dst.getChanEn(); |
2120 | i.ternary16.dstRegNum = dst.getReg().getBase(); |
2121 | i.ternary16.dstSubregNum2_4 = dst.getReg().getByteOffset() >> 2; |
2122 | i.ternary16.dstType = getTernary16Typecode8(dst.getReg().getType()); |
2123 | |
2124 | i.ternary16.srcType = getTernary16Typecode8(src0.getReg().getType()); |
2125 | |
2126 | bool isFOrHF = (src0.getReg().getType() == DataType::f |
2127 | || src0.getReg().getType() == DataType::hf); |
2128 | |
2129 | i.ternary16.src1Type = isFOrHF && (src1.getReg().getType() == DataType::hf); |
2130 | i.ternary16.src2Type = isFOrHF && (src1.getReg().getType() == DataType::hf); |
2131 | |
2132 | encodeTernaryCommon8(i, src0, src1, src2); |
2133 | |
2134 | db(i); |
2135 | } |
2136 | |
2137 | template <HW hw> |
2138 | template <typename D, typename S0, typename S1, typename S2, HW hw_> |
2139 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2140 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2) |
2141 | { |
2142 | if (hw < HW::Gen10) |
2143 | unsupported(); |
2144 | |
2145 | #ifdef NGEN_SAFE |
2146 | if (src0.isARF()) throw grf_expected_exception(); |
2147 | if (src2.isARF()) throw grf_expected_exception(); |
2148 | #endif |
2149 | |
2150 | Instruction8 i{}; |
2151 | InstructionModifier emod = mod | defaultModifier; |
2152 | |
2153 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 3); |
2154 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2155 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2156 | src2.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2157 | |
2158 | encodeCommon8(i, op, emod); |
2159 | |
2160 | i.ternary1.src0RegFile = std::is_base_of<Immediate, S0>::value; |
2161 | i.ternary1.src1RegFile = src1.isARF(); |
2162 | i.ternary1.src2RegFile = std::is_base_of<Immediate, S2>::value; |
2163 | |
2164 | encodeTernaryCommon8(i, src0, src1, src2); |
2165 | encodeTernary1Dst10(i, dst); |
2166 | |
2167 | db(i); |
2168 | } |
2169 | |
2170 | template <HW hw> |
2171 | template <typename D, typename S0,typename S1, typename S2, HW hw_> |
2172 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2173 | BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2) |
2174 | { |
2175 | typename EncodingTag12Dispatch<hw>::tag tag; |
2176 | Instruction12 i{}; |
2177 | InstructionModifier emod = mod | defaultModifier; |
2178 | |
2179 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 3); |
2180 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2181 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2182 | src2.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2183 | |
2184 | encodeCommon12(i, op, emod, dst, tag); |
2185 | |
2186 | i.ternary.dst = encodeTernaryOperand12<true>(dst, tag).bits; |
2187 | encodeTernarySrc0(i, src0, tag); |
2188 | encodeTernarySrc1(i, src1, tag); |
2189 | encodeTernarySrc2(i, src2, tag); |
2190 | encodeTernaryTypes(i, dst, src0, src1, src2); |
2191 | |
2192 | i.ternary.cmod = static_cast<int>(mod.getCMod()); |
2193 | |
2194 | db(i); |
2195 | } |
2196 | |
2197 | template <HW hw> |
2198 | template <typename DS0> |
2199 | void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0) |
2200 | { |
2201 | InstructionModifier mmod = mod; |
2202 | |
2203 | mmod.setCMod(static_cast<ConditionModifier>(fc)); |
2204 | opX(op, defaultType, mmod, dst, src0); |
2205 | } |
2206 | |
2207 | template <HW hw> |
2208 | template <typename DS0, typename S1> |
2209 | void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1) |
2210 | { |
2211 | InstructionModifier mmod = mod; |
2212 | |
2213 | mmod.setCMod(static_cast<ConditionModifier>(fc)); |
2214 | opX(op, defaultType, mmod, dst, src0, src1); |
2215 | } |
2216 | |
2217 | template <HW hw> |
2218 | template <typename D, typename S0, typename S2> |
2219 | void BinaryCodeGenerator<hw>::opBfn(Opcode op, DataType defaultType, const InstructionModifier &mod, int bfnCtrl, D dst, S0 src0, RegData src1, S2 src2) |
2220 | { |
2221 | if (hw < HW::XeHP) |
2222 | unsupported(); |
2223 | |
2224 | typename EncodingTag12Dispatch<hw>::tag tag; |
2225 | Instruction12 i{}; |
2226 | InstructionModifier emod = mod | defaultModifier; |
2227 | |
2228 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 3); |
2229 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2230 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2231 | src2.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2232 | |
2233 | encodeCommon12(i, op, emod, dst, tag); |
2234 | |
2235 | i.ternary.dst = encodeTernaryOperand12<true>(dst, tag).bits; |
2236 | encodeTernarySrc0(i, src0, tag); |
2237 | encodeTernarySrc1(i, src1, tag); |
2238 | encodeTernarySrc2(i, src2, tag); |
2239 | encodeTernaryTypes(i, dst, src0, src1, src2); |
2240 | |
2241 | i.ternary.cmod = static_cast<int>(mod.getCMod()); |
2242 | |
2243 | i.bfn.bfnCtrl03 = (bfnCtrl >> 0); |
2244 | i.bfn.bfnCtrl47 = (bfnCtrl >> 4); |
2245 | |
2246 | db(i); |
2247 | } |
2248 | |
2249 | template <HW hw> |
2250 | void BinaryCodeGenerator<hw>::opDpas(Opcode op, DataType defaultType, const InstructionModifier &mod, int sdepth, int rcount, RegData dst, RegData src0, RegData src1, RegData src2) |
2251 | { |
2252 | if (hw < HW::XeHP) |
2253 | unsupported(); |
2254 | |
2255 | typename EncodingTag12Dispatch<hw>::tag tag; |
2256 | Instruction12 i{}; |
2257 | InstructionModifier emod = mod | defaultModifier; |
2258 | |
2259 | dst.fixup(hw, emod.getExecSize(), defaultType, true, 3); |
2260 | src0.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2261 | src1.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2262 | src2.fixup(hw, emod.getExecSize(), defaultType, false, 3); |
2263 | |
2264 | encodeCommon12(i, op, emod, dst, tag); |
2265 | |
2266 | i.ternary.dst = encodeTernaryOperand12<true, false>(dst, tag).bits; |
2267 | i.ternary.src0 = encodeTernaryOperand12<false, false>(src0, tag).bits; |
2268 | i.ternary.src1 = encodeTernaryOperand12<false, false>(src1, tag).bits; |
2269 | i.ternary.src2 = encodeTernaryOperand12<false, false>(src2, tag).bits; |
2270 | |
2271 | encodeTernaryTypes(i, dst, src0, src1, src2); |
2272 | |
2273 | i.dpas.rcount = rcount - 1; |
2274 | i.dpas.sdepth = utils::log2(sdepth); |
2275 | |
2276 | // i.dpas.src1SubBytePrecision = 0; // TODO: 0 -> (none), 1 -> u4/s4, 2 -> u2/s2 |
2277 | // i.dpas.src2SubBytePrecision = 0; |
2278 | |
2279 | i.ternary.cmod = static_cast<int>(mod.getCMod()); |
2280 | |
2281 | db(i); |
2282 | } |
2283 | |
2284 | template <HW hw> |
2285 | template <typename D, HW hw_> |
2286 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2287 | BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc) |
2288 | { |
2289 | exdesc |= uint32_t(static_cast<uint8_t>(sfid)); |
2290 | opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc); |
2291 | } |
2292 | |
2293 | template <HW hw> |
2294 | template <typename D, HW hw_> |
2295 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2296 | BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc) |
2297 | { |
2298 | opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc); |
2299 | } |
2300 | |
2301 | template <HW hw> |
2302 | template <typename ED, typename D, HW hw_> |
2303 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2304 | BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc) |
2305 | { |
2306 | typename EncodingTag12Dispatch<hw>::tag tag; |
2307 | Instruction12 i{}; |
2308 | InstructionModifier emod = mod | defaultModifier; |
2309 | |
2310 | encodeCommon12(i, op, emod, dst, tag); |
2311 | |
2312 | i.send.fusionCtrl = emod.isSerialized(); |
2313 | |
2314 | i.send.dstReg = dst.getBase(); |
2315 | i.send.src0Reg = src0.getBase(); |
2316 | i.send.src1Reg = src1.getBase(); |
2317 | |
2318 | i.send.dstRegFile = getRegFile(dst); |
2319 | i.send.src0RegFile = getRegFile(src0); |
2320 | i.send.src1RegFile = getRegFile(src1); |
2321 | |
2322 | i.send.sfid = static_cast<int>(sfid) & 0xF; |
2323 | |
2324 | encodeSendDesc(i, desc); |
2325 | encodeSendExDesc(i, exdesc); |
2326 | |
2327 | db(i); |
2328 | } |
2329 | |
2330 | template <HW hw> |
2331 | template <HW hw_> |
2332 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2333 | BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) |
2334 | { |
2335 | Instruction8 i{}; |
2336 | InstructionModifier emod = mod | defaultModifier; |
2337 | |
2338 | encodeCommon8(i, op, emod); |
2339 | |
2340 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2341 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
2342 | |
2343 | i.sendsGen9.dstRegFile = getRegFile(dst); |
2344 | i.binary.src0RegFile = getRegFile(src0); |
2345 | i.binary.src1RegFile = RegFileIMM; |
2346 | |
2347 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
2348 | |
2349 | i.sendsGen9.sfid = exdesc & 0xF; |
2350 | i.sendGen8.zero = 0; |
2351 | i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF; |
2352 | i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF; |
2353 | i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF; |
2354 | i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF; |
2355 | i.sendsGen9.desc = desc; |
2356 | |
2357 | i.sendsGen9.eot = (exdesc >> 5) & 1; |
2358 | if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9; |
2359 | |
2360 | db(i); |
2361 | } |
2362 | |
2363 | template <HW hw> |
2364 | template <HW hw_> |
2365 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2366 | BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) |
2367 | { |
2368 | #ifdef NGEN_SAFE |
2369 | // Only a0.0:ud is allowed for desc. |
2370 | if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0) |
2371 | throw invalid_arf_exception(); |
2372 | #endif |
2373 | Instruction8 i{}; |
2374 | InstructionModifier emod = mod | defaultModifier; |
2375 | |
2376 | encodeCommon8(i, op, emod); |
2377 | |
2378 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2379 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
2380 | i.binary.src1 = encodeBinaryOperand8<false>(desc).bits; |
2381 | |
2382 | i.sendsGen9.dstRegFile = getRegFile(dst); |
2383 | i.binary.src0RegFile = getRegFile(src0); |
2384 | i.binary.src1RegFile = getRegFile(desc); |
2385 | i.binary.src1Type = getTypecode<hw>(desc.getType()); |
2386 | |
2387 | i.sendsGen9.sfid = exdesc & 0xF; |
2388 | i.sendGen8.zero = 0; |
2389 | i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF; |
2390 | i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF; |
2391 | i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF; |
2392 | i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF; |
2393 | |
2394 | i.sendsGen9.eot = (exdesc >> 5) & 1; |
2395 | if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9; |
2396 | |
2397 | db(i); |
2398 | } |
2399 | |
2400 | template <HW hw> |
2401 | template <typename D, HW hw_> |
2402 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2403 | BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc) |
2404 | { |
2405 | opSends(op, mod, dst, src0, null, exdesc, desc); |
2406 | } |
2407 | |
2408 | template <HW hw> |
2409 | template <typename ED, typename D, HW hw_> |
2410 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2411 | BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc) |
2412 | { |
2413 | Instruction8 i{}; |
2414 | InstructionModifier emod = mod | defaultModifier; |
2415 | |
2416 | encodeCommon8(i, op, emod); |
2417 | |
2418 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2419 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
2420 | |
2421 | i.binary.src0RegFile = 0; // ? |
2422 | i.sendsGen9.dstRegFile = getRegFile(dst); |
2423 | i.sendsGen9.src1RegFile = getRegFile(src1); |
2424 | i.sendsGen9.src1RegNum = src1.getBase(); |
2425 | |
2426 | if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9; |
2427 | if (src0.isIndirect()) i.sendsGen9.src0AddrImm9 = src0.getOffset() >> 9; |
2428 | |
2429 | encodeSendsDesc(i, desc); |
2430 | encodeSendsExDesc(i, exdesc); |
2431 | |
2432 | db(i); |
2433 | } |
2434 | |
2435 | template <HW hw> |
2436 | template <typename D, HW hw_> |
2437 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2438 | BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc) |
2439 | { |
2440 | #ifdef NGEN_SAFE |
2441 | throw sfid_needed_exception(); |
2442 | #endif |
2443 | } |
2444 | |
2445 | template <HW hw> |
2446 | template <typename D, HW hw_> |
2447 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2448 | BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc) |
2449 | { |
2450 | Opcode mop = static_cast<Opcode>(static_cast<int>(op) & ~2); |
2451 | opSend(mop, mod, static_cast<SharedFunction>(exdesc & 0x1F), dst, src0, src1, exdesc, desc); |
2452 | } |
2453 | |
2454 | template <HW hw> |
2455 | template <HW hw_> |
2456 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2457 | BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip) |
2458 | { |
2459 | Instruction8 i{}; |
2460 | InstructionModifier emod = mod | defaultModifier; |
2461 | |
2462 | encodeCommon8(i, op, emod); |
2463 | |
2464 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2465 | i.binary.dstRegFile = getRegFile(dst); |
2466 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
2467 | i.binary.src0RegFile = getRegFile(Immediate()); |
2468 | i.binary.src0Type = getTypecode<hw>(DataType::d); |
2469 | i.branches.jip = jip; |
2470 | i.branches.uip = uip; |
2471 | |
2472 | db(i); |
2473 | } |
2474 | |
2475 | template <HW hw> |
2476 | template <HW hw_> |
2477 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2478 | BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip) |
2479 | { |
2480 | typename EncodingTag12Dispatch<hw>::tag tag; |
2481 | Instruction12 i{}; |
2482 | InstructionModifier emod = mod | defaultModifier; |
2483 | |
2484 | encodeCommon12(i, op, emod, dst, tag); |
2485 | |
2486 | i.branches.branchCtrl = emod.getBranchCtrl(); |
2487 | |
2488 | i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits; |
2489 | |
2490 | i.binary.src0Imm = true; |
2491 | i.binary.src1Imm = true; |
2492 | |
2493 | i.branches.jip = jip; |
2494 | i.branches.uip = uip; |
2495 | |
2496 | db(i); |
2497 | } |
2498 | |
2499 | template <HW hw> |
2500 | template <bool forceWE, HW hw_> |
2501 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2502 | BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip) |
2503 | { |
2504 | Instruction8 i{}; |
2505 | InstructionModifier emod = mod | defaultModifier; |
2506 | if (forceWE) |
2507 | emod |= NoMask; |
2508 | |
2509 | encodeCommon8(i, op, emod); |
2510 | |
2511 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2512 | i.binary.dstRegFile = getRegFile(dst); |
2513 | i.binary.dstType = getTypecode<hw>(dst.getType()); |
2514 | i.binary.src1RegFile = RegFileIMM; |
2515 | i.binary.src1Type = getTypecode<hw>(DataType::d); |
2516 | i.branches.jip = jip; |
2517 | |
2518 | db(i); |
2519 | } |
2520 | |
2521 | template <HW hw> |
2522 | template <bool forceWE, HW hw_> |
2523 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2524 | BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip) |
2525 | { |
2526 | typename EncodingTag12Dispatch<hw>::tag tag; |
2527 | Instruction12 i{}; |
2528 | InstructionModifier emod = mod | defaultModifier; |
2529 | if (forceWE) |
2530 | emod |= NoMask; |
2531 | |
2532 | encodeCommon12(i, op, emod, dst, tag); |
2533 | |
2534 | i.branches.branchCtrl = emod.getBranchCtrl(); |
2535 | |
2536 | i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits; |
2537 | i.binary.src0Imm = true; |
2538 | i.branches.jip = jip; |
2539 | |
2540 | db(i); |
2541 | } |
2542 | |
2543 | template <HW hw> |
2544 | template <bool forceWE, bool small12, HW hw_> |
2545 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2546 | BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0) |
2547 | { |
2548 | Instruction8 i{}; |
2549 | InstructionModifier emod = mod | defaultModifier; |
2550 | if (forceWE) |
2551 | emod |= NoMask; |
2552 | |
2553 | encodeCommon8(i, op, emod); |
2554 | |
2555 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2556 | i.binary.dstRegFile = getRegFile(dst); |
2557 | i.binary.dstType = getTypecode<hw>(DataType::d); |
2558 | i.binary.src0RegFile = getRegFile(src0); |
2559 | i.binary.src0Type = getTypecode<hw>(DataType::d); |
2560 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
2561 | |
2562 | db(i); |
2563 | } |
2564 | |
2565 | template <HW hw> |
2566 | template <bool forceWE, bool small12, HW hw_> |
2567 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2568 | BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0) |
2569 | { |
2570 | typename EncodingTag12Dispatch<hw>::tag tag; |
2571 | Instruction12 i{}; |
2572 | InstructionModifier emod = mod | defaultModifier; |
2573 | if (forceWE) |
2574 | emod |= NoMask; |
2575 | |
2576 | encodeCommon12(i, op, emod, dst, tag); |
2577 | |
2578 | i.branches.branchCtrl = emod.getBranchCtrl(); |
2579 | |
2580 | i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits; |
2581 | i.binary.src0 = encodeBinaryOperand12<false, false>(src0, tag).bits; |
2582 | if (small12) |
2583 | i.binary.src0 &= 0xFFFF; |
2584 | |
2585 | db(i); |
2586 | } |
2587 | |
2588 | template <HW hw> |
2589 | void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip) |
2590 | { |
2591 | addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset)); |
2592 | addFixup(LabelFixup(uip.getID(labelManager), LabelFixup::UIPOffset)); |
2593 | opBranch(op, mod, dst, 0, 0); |
2594 | } |
2595 | |
2596 | template <HW hw> |
2597 | template <bool forceWE> |
2598 | void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip) |
2599 | { |
2600 | addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset)); |
2601 | opBranch<forceWE>(op, mod, dst, 0); |
2602 | } |
2603 | |
2604 | template <HW hw> |
2605 | void BinaryCodeGenerator<hw>::opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip) |
2606 | { |
2607 | addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset)); |
2608 | if (isGen12) |
2609 | opBranch<true>(op, mod, dst, 0); |
2610 | else |
2611 | opX<true>(op, DataType::d, mod, dst, null.ud(0)(0, 1, 0), Immediate::d(0)); |
2612 | } |
2613 | |
2614 | template <HW hw> |
2615 | template <HW hw_> |
2616 | typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type |
2617 | BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip) |
2618 | { |
2619 | Instruction8 i{}; |
2620 | InstructionModifier emod = mod | defaultModifier | NoMask; |
2621 | |
2622 | encodeCommon8(i, op, emod); |
2623 | |
2624 | src0.fixup(hw, emod.getExecSize(), DataType::d, false, 2); |
2625 | |
2626 | i.binary.dst = encodeBinaryOperand8<true>(dst).bits; |
2627 | i.binary.src0 = encodeBinaryOperand8<false>(src0).bits; |
2628 | i.binary.src0RegFile = getRegFile(src0); |
2629 | i.binary.src1RegFile = RegFileIMM; |
2630 | i.binary.src1Type = getTypecode<hw>(DataType::d); |
2631 | |
2632 | i.branches.jip = jip; |
2633 | |
2634 | db(i); |
2635 | } |
2636 | |
2637 | template <HW hw> |
2638 | template <HW hw_> |
2639 | typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type |
2640 | BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip) |
2641 | { |
2642 | opBranch<true>(op, mod, dst, jip); |
2643 | } |
2644 | |
2645 | template <HW hw> |
2646 | void BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip) |
2647 | { |
2648 | if (hw >= HW::Gen12LP) |
2649 | addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset)); |
2650 | opJmpi(op, mod, dst, src0, 0); |
2651 | if (hw < HW::Gen12LP) |
2652 | addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffsetJMPI)); |
2653 | } |
2654 | |
2655 | template <HW hw> |
2656 | void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod) |
2657 | { |
2658 | if (hw < HW::Gen12LP) |
2659 | unsupported(); |
2660 | |
2661 | typename EncodingTag12Dispatch<hw>::tag tag; |
2662 | Instruction12 i{}; |
2663 | InstructionModifier emod = mod | defaultModifier; |
2664 | |
2665 | encodeCommon12(i, op, emod, null, tag); |
2666 | |
2667 | i.binary.dst = 0x1; |
2668 | i.binary.cmod = static_cast<int>(fc); |
2669 | |
2670 | db(i); |
2671 | } |
2672 | |
2673 | template <HW hw> |
2674 | void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0) |
2675 | { |
2676 | typename EncodingTag12Dispatch<hw>::tag tag; |
2677 | if (hw < HW::Gen12LP) |
2678 | unsupported(); |
2679 | |
2680 | Instruction12 i{}; |
2681 | InstructionModifier emod = mod | defaultModifier; |
2682 | |
2683 | encodeCommon12(i, op, emod, null, tag); |
2684 | |
2685 | i.binary.dst = 0x1; |
2686 | if (!src0.isNull()) { |
2687 | src0.setRegion(0, 1, 0); |
2688 | i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits; |
2689 | i.binary.src0Type = getTypecode12(src0.getType()); |
2690 | } |
2691 | i.binary.cmod = static_cast<int>(fc); |
2692 | |
2693 | db(i); |
2694 | } |
2695 | |
2696 | template <HW hw> |
2697 | void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0) |
2698 | { |
2699 | if (hw < HW::Gen12LP) |
2700 | unsupported(); |
2701 | |
2702 | typename EncodingTag12Dispatch<hw>::tag tag; |
2703 | Instruction12 i{}; |
2704 | InstructionModifier emod = mod | defaultModifier; |
2705 | |
2706 | encodeCommon12(i, op, emod, null, tag); |
2707 | |
2708 | i.binary.dst = 0x1; |
2709 | i.binary.src0Type = getTypecode12(src0.getType()); |
2710 | i.binary.src0Imm = true; |
2711 | i.binary.cmod = static_cast<int>(fc); |
2712 | |
2713 | i.imm32.value = static_cast<uint64_t>(src0); |
2714 | |
2715 | db(i); |
2716 | } |
2717 | |
2718 | template <HW hw> |
2719 | void BinaryCodeGenerator<hw>::opNop(Opcode op) |
2720 | { |
2721 | Instruction8 i{}; |
2722 | |
2723 | i.qword[0] = static_cast<int>(op); |
2724 | i.qword[1] = 0; |
2725 | |
2726 | db(i); |
2727 | } |
2728 | |
2729 | } /* namespace ngen */ |
2730 | |
2731 | #endif /* header guard */ |
2732 | |