1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17// nGEN: a C++ library for runtime Gen assembly generation.
18//
19// Macros that control nGEN's interface:
20// NGEN_SAFE if defined, enables run-time safety checks. Exceptions will be thrown if checks fail.
21// NGEN_SHORT_NAMES if defined, enables some short names (r[...] for indirect addressing, W for NoMask)
22// NGEN_GLOBAL_REGS if defined, register names and instruction modifiers (r7, cr0, Switch, etc.) are
23// global variables in the ngen namespace. Otherwise, they are members of the code
24// generator classes
25// NGEN_CPP11 if defined, ngen is C++11-compatible (C++17 not required)
26
27#ifndef NGEN_HPP
28#define NGEN_HPP
29
30#include "ngen_config.hpp"
31
32#include <array>
33#include <cstring>
34#include <type_traits>
35#include <vector>
36
37#include "ngen_core.hpp"
38#include "ngen_auto_swsb.hpp"
39
40namespace ngen {
41
42// Forward declarations.
43template <HW hw> class BinaryCodeGenerator;
44template <HW hw> class ELFCodeGenerator;
45
46// MSVC v140 workaround for enum comparison in template arguments.
47static constexpr bool hwLT(HW hw1, HW hw2) { return hw1 < hw2; }
48static constexpr bool hwLE(HW hw1, HW hw2) { return hw1 <= hw2; }
49static constexpr bool hwGE(HW hw1, HW hw2) { return hw1 >= hw2; }
50static constexpr bool hwGT(HW hw1, HW hw2) { return hw1 > hw2; }
51
52// -----------------------------------------------------------------------
53
54enum RegFiles : unsigned {
55 RegFileARF = 0,
56 RegFileGRF = 1,
57 RegFileIMM = 3,
58};
59
60inline unsigned getRegFile(const RegData &rd) { return rd.isARF() ? RegFileARF : RegFileGRF; }
61inline unsigned getRegFile(const Align16Operand &o) { return getRegFile(o.getReg()); }
62inline unsigned getRegFile(const ExtendedReg &reg) { return getRegFile(reg.getBase()); }
63inline unsigned getRegFile(const Immediate &imm) { return RegFileIMM; }
64
65// -----------------------------------------------------------------------
66// Binary formats, split between pre-Gen12 and post-Gen12.
67
68#include "ngen_gen8.hpp"
69#include "ngen_gen12.hpp"
70
71// -----------------------------------------------------------------------
72
73
74class LabelFixup {
75public:
76 uint32_t labelID;
77 int32_t anchor;
78 int32_t offset;
79
80 LabelFixup(uint32_t labelID_, int32_t offset_) : labelID(labelID_), anchor(0), offset(offset_) {}
81
82 static constexpr auto JIPOffset = 12;
83 static constexpr auto JIPOffsetJMPI = -4;
84 static constexpr auto UIPOffset = 8;
85};
86
87#if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED)
88#define NGEN_GLOBAL_REGS_DEFINED
89#include "ngen_registers.hpp"
90#endif
91
92template <HW hw>
93class BinaryCodeGenerator
94{
95 friend class ELFCodeGenerator<hw>;
96
97public:
98 static constexpr HW hardware = hw;
99
100protected:
101 class InstructionStream {
102 friend class BinaryCodeGenerator;
103
104 std::vector<LabelFixup> fixups;
105 std::vector<uint32_t> labels;
106 std::vector<uint64_t> code;
107 bool appended = false;
108
109 int length() const { return int(code.size() * sizeof(uint64_t)); }
110
111 void db(const Instruction8 &i) {
112 code.push_back(i.qword[0]);
113 code.push_back(i.qword[1]);
114 }
115
116 void db(const Instruction12 &i) {
117 code.push_back(i.qword[0]);
118 code.push_back(i.qword[1]);
119 }
120
121 void addFixup(LabelFixup fixup) {
122 fixup.anchor = length();
123 fixups.push_back(fixup);
124 }
125
126 void mark(Label &label, LabelManager &man) {
127 uint32_t id = label.getID(man);
128
129 man.setTarget(id, length());
130 labels.push_back(id);
131 }
132
133 void fixLabels(LabelManager &man) {
134 for (const auto &fixup : fixups) {
135 int32_t target = man.getTarget(fixup.labelID);
136 uint8_t *field = ((uint8_t *) code.data()) + fixup.anchor + fixup.offset;
137 *((int32_t *) field) = target - fixup.anchor;
138 }
139 }
140
141 void append(InstructionStream &other, LabelManager &man) {
142 auto offset = length();
143 auto sz = code.size();
144
145 code.resize(sz + other.code.size());
146 std::copy(other.code.begin(), other.code.end(), code.begin() + sz);
147
148 sz = labels.size();
149 labels.resize(sz + other.labels.size());
150 std::copy(other.labels.begin(), other.labels.end(), labels.begin() + sz);
151
152 for (LabelFixup fixup : other.fixups) {
153 fixup.anchor += offset;
154 fixups.push_back(fixup);
155 }
156
157#ifdef NGEN_SAFE
158 if (other.appended && !other.labels.empty())
159 throw multiple_label_exception();
160#endif
161
162 for (uint32_t id : other.labels)
163 man.offsetTarget(id, offset);
164
165 other.appended = true;
166 }
167
168 InstructionStream() {}
169 };
170
171 class Program {
172 friend class BinaryCodeGenerator;
173 using Instruction = typename std::conditional<(hw >= HW::XeHPC), InstructionXeHPC, Instruction12>::type;
174 std::vector<uint64_t> &code;
175
176 Program(InstructionStream &stream) : code(stream.code) {};
177
178 public:
179 size_t size() const { return code.size() >> 1; }
180 Instruction &operator[](size_t index) { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
181 const Instruction &operator[](size_t index) const { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
182 };
183
184 static constexpr bool isGen12 = (hw >= HW::Gen12LP);
185 int stepping = 0;
186
187 Label _labelLocalIDsLoaded;
188 Label _labelArgsLoaded;
189
190private:
191 InstructionModifier defaultModifier;
192
193 LabelManager labelManager;
194 InstructionStream rootStream;
195 std::vector<InstructionStream*> streamStack;
196
197 void db(const Instruction8 &i) { streamStack.back()->db(i); }
198 void db(const Instruction12 &i) { streamStack.back()->db(i); }
199 void addFixup(LabelFixup fixup) { streamStack.back()->addFixup(fixup); }
200
201 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
202 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
203 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
204 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
205 template <bool forceWE = false, typename D, HW hw_ = hw>
206 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
207 template <bool forceWE = false, typename D, HW hw_ = hw>
208 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
209
210 template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
211 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
212 template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
213 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
214 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
215 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
216 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
217 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
218
219 template <HW hw_ = hw>
220 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2);
221 template <HW hw_ = hw>
222 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2);
223 template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
224 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
225 template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
226 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
227
228 template <typename DS0>
229 void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0);
230 template <typename DS0, typename S1>
231 void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1);
232
233 template <typename D, typename S0, typename S2>
234 void opBfn(Opcode op, DataType defaultType, const InstructionModifier &mod, int bfnCtrl, D dst, S0 src0, RegData src1, S2 src2);
235 void opDpas(Opcode op, DataType defaultType, const InstructionModifier &mod, int sdepth, int rcount, RegData dst, RegData src0, RegData src1, RegData src2);
236
237 template <typename D, HW hw_ = hw>
238 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
239 template <typename D, HW hw_ = hw>
240 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc);
241 template <typename ED, typename D, HW hw_ = hw>
242 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
243
244 template <HW hw_ = hw>
245 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc);
246 template <HW hw_ = hw>
247 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc);
248 template <typename D, HW hw_ = hw>
249 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc);
250
251 template <typename ED, typename D, HW hw_ = hw>
252 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
253 template <typename D, HW hw_ = hw>
254 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
255 template <typename D, HW hw_ = hw>
256 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc);
257
258 template <HW hw_ = hw>
259 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
260 template <HW hw_ = hw>
261 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
262 template <bool forceWE = false, HW hw_ = hw>
263 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
264 template <bool forceWE = false, HW hw_ = hw>
265 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
266 template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
267 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
268 template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
269 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
270
271 void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip);
272 template <bool forceWE = false>
273 void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
274 void opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
275
276 template <HW hw_ = hw>
277 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
278 template <HW hw_ = hw>
279 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
280 void opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip);
281
282 void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod);
283 void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0);
284 void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0);
285
286 void opNop(Opcode op);
287
288 inline void unsupported();
289
290#include "ngen_compiler_fix.hpp"
291
292public:
293 explicit BinaryCodeGenerator(int stepping_ = 0) : stepping{stepping_}, defaultModifier{}, labelManager{}, sync{this}, load{this}, store{this}, atomic{this} {
294 _workaround_();
295 pushStream(rootStream);
296 }
297
298 ~BinaryCodeGenerator() {
299 for (size_t sn = 1; sn < streamStack.size(); sn++)
300 delete streamStack[sn];
301 }
302
303 std::vector<uint8_t> getCode();
304 size_t getRootStreamLength() const { return rootStream.length(); }
305
306 int getStepping() const { return stepping; }
307 void setStepping(int stepping_) { stepping = stepping_; }
308
309protected:
310 // Configuration.
311 void setDefaultNoMask(bool def = true) { defaultModifier.setWrEn(def); }
312 void setDefaultAutoSWSB(bool def = true) { defaultModifier.setAutoSWSB(def); }
313 bool getDefaultNoMask() const { return defaultModifier.isWrEn(); }
314 bool getDefaultAutoSWSB() const { return defaultModifier.isAutoSWSB(); }
315
316 // Stream handling.
317 void pushStream() { pushStream(new InstructionStream()); }
318 void pushStream(InstructionStream *s) { streamStack.push_back(s); }
319 void pushStream(InstructionStream &s) { pushStream(&s); }
320
321 InstructionStream *popStream();
322
323 void appendStream(InstructionStream *s) { appendStream(*s); }
324 void appendStream(InstructionStream &s) { streamStack.back()->append(s, labelManager); }
325 void appendCurrentStream() { InstructionStream *s = popStream(); appendStream(s); delete s; }
326
327 void discardStream() { delete popStream(); }
328
329 template <typename String>
330 void comment(String) {}
331
332 // Registers.
333#ifndef NGEN_GLOBAL_REGS
334#include "ngen_registers.hpp"
335#endif
336
337 // Labels.
338 inline void mark(Label &label) { streamStack.back()->mark(label, labelManager); }
339
340 // Instructions.
341 template <typename DT = void>
342 void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
343 opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
344 }
345 template <typename DT = void>
346 void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
347 opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
348 }
349 template <typename DT = void>
350 void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
351 opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
352 }
353 template <typename DT = void>
354 void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
355 opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
356 }
357 template <typename DT = void>
358 void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
359 if (hw < HW::XeHP) unsupported();
360 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
361 }
362 template <typename DT = void>
363 void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
364 if (hw < HW::XeHP) unsupported();
365 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
366 }
367 template <typename DT = void>
368 void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
369 if (hw < HW::XeHP) unsupported();
370 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
371 }
372 template <typename DT = void>
373 void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
374 if (hw < HW::XeHP) unsupported();
375 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
376 }
377 template <typename DT = void>
378 void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
379 opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
380 }
381 template <typename DT = void>
382 void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
383 opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
384 }
385#ifndef NGEN_NO_OP_NAMES
386 template <typename DT = void>
387 void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
388 and_<DT>(mod, dst, src0, src1);
389 }
390 template <typename DT = void>
391 void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
392 and_<DT>(mod, dst, src0, src1);
393 }
394#endif
395 template <typename DT = void>
396 void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
397 opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
398 }
399 template <typename DT = void>
400 void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
401 opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
402 }
403 template <typename DT = void>
404 void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
405 opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
406 }
407 template <typename DT = void>
408 void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
409 opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
410 }
411 template <typename DT = void>
412 void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
413 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
414 }
415 template <typename DT = void>
416 void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
417 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
418 }
419 template <typename DT = void>
420 void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
421 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
422 }
423 template <typename DT = void>
424 void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
425 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
426 }
427 template <typename DT = void>
428 void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
429 opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
430 }
431 template <typename DT = void>
432 void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
433 opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
434 }
435 template <typename DT = void>
436 void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
437 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
438 }
439 template <typename DT = void>
440 void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
441 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
442 }
443 template <typename DT = void>
444 void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
445 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
446 }
447 template <typename DT = void>
448 void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
449 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
450 }
451 template <typename DT = void>
452 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
453 if (hw < HW::XeHP) unsupported();
454 opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
455 }
456 template <typename DT = void>
457 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
458 if (hw < HW::XeHP) unsupported();
459 opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
460 }
461 template <typename DT = void>
462 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
463 if (hw < HW::XeHP) unsupported();
464 opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
465 }
466 template <typename DT = void>
467 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
468 if (hw < HW::XeHP) unsupported();
469 opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
470 }
471 template <typename DT = void>
472 void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
473 opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
474 }
475 template <typename DT = void>
476 void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
477 opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
478 }
479 void brc(const InstructionModifier &mod, Label &jip, Label &uip) {
480 opBranch(Opcode::brc, mod, isGen12 ? null.ud() : ip.d(), jip, uip);
481 }
482 void brc(const InstructionModifier &mod, RegData src0) {
483 src0.setRegion(2, 2, 1);
484 opBranch<true, true>(Opcode::brc, mod, isGen12 ? null.ud() : ip.d(), src0);
485 }
486 void brd(const InstructionModifier &mod, Label &jip) {
487 opBranch(Opcode::brd, mod, isGen12 ? null.ud() : ip.d(), jip);
488 }
489 void brd(const InstructionModifier &mod, RegData src0) {
490 src0.setRegion(2, 2, 1);
491 opBranch<true, true>(Opcode::brd, mod, isGen12 ? null.ud() : ip.d(), src0);
492 }
493 void break_(const InstructionModifier &mod, Label &jip, Label &uip) {
494 opBranch(Opcode::break_, mod, null, jip, uip);
495 }
496 void call(const InstructionModifier &mod, const RegData &dst, Label &jip) {
497 opCall(Opcode::call, mod, dst, jip);
498 }
499 void call(const InstructionModifier &mod, const RegData &dst, RegData jip) {
500 if (isGen12)
501 opBranch<true, true>(Opcode::call, mod, dst, jip);
502 else {
503 jip.setRegion(0, 1, 0);
504 opX<true>(Opcode::call, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
505 }
506 }
507 void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) {
508 if (isGen12)
509 opBranch<true>(Opcode::calla, mod, dst, jip);
510 else
511 opX<true>(Opcode::calla, DataType::d, mod, dst, (hw <= HW::Gen9) ? null.ud(0)(2,2,1) : null.ud(0)(0,1,0), Immediate::d(jip));
512 }
513 void calla(const InstructionModifier &mod, const RegData &dst, RegData jip) {
514 if (isGen12)
515 opBranch<true, true>(Opcode::calla, mod, dst, jip);
516 else {
517 jip.setRegion(0, 1, 0);
518 opX<true>(Opcode::calla, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
519 }
520 }
521 template <typename DT = void>
522 void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
523 opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
524 }
525 template <typename DT = void>
526 void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
527 opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
528 }
529 template <typename DT = void>
530 void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
531 opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
532 }
533 template <typename DT = void>
534 void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
535 opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
536 }
537 template <typename DT = void>
538 void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
539 opX(isGen12 ? Opcode::cmpn_gen12 : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1);
540 }
541 template <typename DT = void>
542 void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
543 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
544 }
545 template <typename DT = void>
546 void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
547 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
548 }
549 template <typename DT = void>
550 void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
551 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
552 }
553 template <typename DT = void>
554 void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
555 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
556 }
557 void cont(const InstructionModifier &mod, Label &jip, Label &uip) {
558 opBranch(Opcode::cont, mod, null, jip, uip);
559 }
560 template <typename DT = void>
561 void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
562 opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
563 }
564 template <typename DT = void>
565 void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
566 opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
567 }
568 template <typename DT = void>
569 void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
570 opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
571 }
572 template <typename DT = void>
573 void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
574 opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
575 }
576 template <typename DT = void>
577 void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
578 opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
579 }
580 template <typename DT = void>
581 void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
582 opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
583 }
584 template <typename DT = void>
585 void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
586 if (hw < HW::Gen12LP) unsupported();
587 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
588 }
589 template <typename DT = void>
590 void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
591 if (hw < HW::Gen12LP) unsupported();
592 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
593 }
594 template <typename DT = void>
595 void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
596 if (hw < HW::Gen12LP) unsupported();
597 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
598 }
599 template <typename DT = void>
600 void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
601 if (hw < HW::Gen12LP) unsupported();
602 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
603 }
604 template <typename DT = void>
605 void dpas(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
606 opDpas(Opcode::dpas, getDataType<DT>(), mod, sdepth, rcount, dst, src0, src1, src2);
607 }
608 template <typename DT = void>
609 void dpasw(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
610 opDpas(Opcode::dpasw, getDataType<DT>(), mod, sdepth, rcount, dst, src0, src1, src2);
611 }
612 template <typename DT = void>
613 void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
614 opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
615 }
616 template <typename DT = void>
617 void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
618 opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
619 }
620 void else_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
621 mod.setBranchCtrl(branchCtrl);
622 opBranch(Opcode::else_, mod, null, jip, uip);
623 }
624 void else_(InstructionModifier mod, Label &jip) {
625 else_(mod, jip, jip);
626 }
627 void endif(const InstructionModifier &mod, Label &jip) {
628 opBranch(Opcode::endif, mod, null, jip);
629 }
630 void endif(const InstructionModifier &mod) {
631 opBranch(Opcode::endif, mod, null, sizeof(Instruction8));
632 }
633 template <typename DT = void>
634 void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
635 opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
636 }
637 template <typename DT = void>
638 void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
639 opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
640 }
641 template <typename DT = void>
642 void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
643 opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
644 }
645 template <typename DT = void>
646 void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
647 opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
648 }
649 template <typename DT = void>
650 void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
651 opX(Opcode::frc, getDataType<DT>(), mod, dst, src0);
652 }
653 void goto_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
654 mod.setBranchCtrl(branchCtrl);
655 opBranch(Opcode::goto_, mod, null, jip, uip);
656 }
657 void goto_(const InstructionModifier &mod, Label &jip) {
658 goto_(mod, jip, jip);
659 }
660 void halt(const InstructionModifier &mod, Label &jip, Label &uip) {
661 opBranch(Opcode::halt, mod, null, jip, uip);
662 }
663 void halt(const InstructionModifier &mod, Label &jip) {
664 halt(mod, jip, jip);
665 }
666 void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
667 mod.setBranchCtrl(branchCtrl);
668 opBranch(Opcode::if_, mod, null, jip, uip);
669 }
670 void if_(const InstructionModifier &mod, Label &jip) {
671 if_(mod, jip, jip);
672 }
673 void illegal() {
674 opX(Opcode::illegal, DataType::invalid, InstructionModifier(), null, null, null);
675 }
676 void join(InstructionModifier mod, Label &jip) {
677 opBranch(Opcode::join, mod, null, jip);
678 }
679 void join(InstructionModifier mod) {
680 opBranch(Opcode::join, mod, null, sizeof(Instruction8));
681 }
682 void jmpi(const InstructionModifier &mod, Label &jip) {
683 auto dst = isGen12 ? ARF(null) : ARF(ip);
684 opJmpi(Opcode::jmpi, mod, dst, dst, jip);
685 }
686 void jmpi(const InstructionModifier &mod, const RegData &jip) {
687#ifdef NGEN_SAFE
688 if (!isGen12 && jip.getType() != DataType::d && jip.getType() != DataType::invalid)
689 throw invalid_type_exception();
690#endif
691 if (isGen12)
692 opBranch<true, false>(Opcode::jmpi, mod, null, jip);
693 else
694 opX(Opcode::jmpi, DataType::d, mod, ip, ip, jip);
695 }
696 template <typename DT = void>
697 void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
698 if (hw >= HW::Gen11) unsupported();
699 opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
700 }
701 template <typename DT = void>
702 void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
703 if (hw >= HW::Gen11) unsupported();
704 opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
705 }
706 template <typename DT = void>
707 void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
708 opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2);
709 }
710 template <typename DT = void>
711 void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
712 opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
713 }
714 template <typename DT = void>
715 void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
716 opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
717 }
718 template <typename DT = void>
719 void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
720 opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
721 }
722 template <typename DT = void>
723 void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
724 opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
725 }
726 template <typename DT = void>
727 void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
728 opX(Opcode::mach, getDataType<DT>(), (hw >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
729 }
730 template <typename DT = void>
731 void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
732 opX(Opcode::mach, getDataType<DT>(), (hw >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
733 }
734 template <typename DT = void>
735 void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
736#ifdef NGEN_SAFE
737 if (hw < HW::Gen10) unsupported();
738#endif
739 opX((hw >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
740 }
741 template <typename DT = void>
742 void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
743#ifdef NGEN_SAFE
744 if (hw < HW::Gen10) unsupported();
745#endif
746 opX((hw >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
747 }
748 template <typename DT = void>
749 void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
750 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
751 }
752 template <typename DT = void>
753 void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
754 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
755 }
756 template <typename DT = void>
757 void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
758 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
759 }
760 template <typename DT = void>
761 void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
762 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
763 }
764 template <typename DT = void, HW hw_ = hw>
765 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
766 madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) {
767 opX(Opcode::madm, getDataType<DT>(), mod, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1), extToAlign16(src2));
768 }
769 template <typename DT = void, HW hw_ = hw>
770 typename std::enable_if<hwGT(hw_, HW::Gen9)>::type
771 madm(const InstructionModifier &mod, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1, const ExtendedReg &src2) {
772 src0.getBase().setRegion(4,4,1);
773 src1.getBase().setRegion(4,4,1);
774 opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2);
775 }
776 template <typename DT = void>
777 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) {
778#ifdef NGEN_SAFE
779 if (mathArgCount(fc) != 1) throw invalid_operand_count_exception();
780#endif
781 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
782 }
783 template <typename DT = void>
784 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) {
785#ifdef NGEN_SAFE
786 if (mathArgCount(fc) != 2) throw invalid_operand_count_exception();
787#endif
788 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
789 }
790 template <typename DT = void>
791 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) {
792#ifdef NGEN_SAFE
793 if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception();
794#endif
795 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1.forceInt32());
796 }
797 template <typename DT = void, HW hw_ = hw>
798 typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
799 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) {
800#ifdef NGEN_SAFE
801 if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
802#endif
803 opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0));
804 }
805 template <typename DT = void, HW hw_ = hw>
806 typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
807 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0) {
808#ifdef NGEN_SAFE
809 if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
810#endif
811 if (hw == HW::Gen11)
812 src0.getBase().setRegion(2,2,1);
813 else
814 src0.getBase().setRegion(1,1,0);
815 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
816 }
817 template <typename DT = void, HW hw_ = hw>
818 typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
819 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
820#ifdef NGEN_SAFE
821 if (fc != MathFunction::invm) throw invalid_operand_exception();
822#endif
823 opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1));
824 }
825 template <typename DT = void, HW hw_ = hw>
826 typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
827 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1) {
828#ifdef NGEN_SAFE
829 if (fc != MathFunction::invm) throw invalid_operand_exception();
830#endif
831 if (hw == HW::Gen11) {
832 src0.getBase().setRegion(2,2,1);
833 src1.getBase().setRegion(2,2,1);
834 } else {
835 src0.getBase().setRegion(1,1,0);
836 src1.getBase().setRegion(1,1,0);
837 }
838 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
839 }
840 template <typename DT = void>
841 void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
842 opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
843 }
844 template <typename DT = void>
845 void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
846 opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
847 }
848 template <typename DT = void>
849 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
850 if (hardware >= HW::Gen10)
851 movi<DT>(mod, dst, src0, null.ud(0)(1,1,0));
852 else
853 opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0);
854 }
855 template <typename DT = void>
856 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
857#ifdef NGEN_SAFE
858 if (hardware < HW::Gen10) throw unsupported_instruction();
859#endif
860 opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
861 }
862 template <typename DT = void>
863 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
864#ifdef NGEN_SAFE
865 if (hardware < HW::Gen10) throw unsupported_instruction();
866#endif
867 opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
868 }
869 template <typename DT = void>
870 void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
871 opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
872 }
873 template <typename DT = void>
874 void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) {
875 if (dst.getBytes() == 8)
876 src1 = src1.forceInt32();
877 opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
878 }
879 void nop() {
880 opNop(isGen12 ? Opcode::nop_gen12 : Opcode::nop);
881 }
882 void nop(const InstructionModifier &mod) {
883 opX(isGen12 ? Opcode::nop_gen12 : Opcode::nop, DataType::invalid, mod, null, null, null);
884 }
885 template <typename DT = void>
886 void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
887 opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
888 }
889 template <typename DT = void>
890 void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
891 opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
892 }
893#ifndef NGEN_NO_OP_NAMES
894 template <typename DT = void>
895 void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
896 not_<DT>(mod, dst, src0);
897 }
898 template <typename DT = void>
899 void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
900 not_<DT>(mod, dst, src0);
901 }
902#endif
903 template <typename DT = void>
904 void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
905 opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
906 }
907 template <typename DT = void>
908 void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
909 opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
910 }
911#ifndef NGEN_NO_OP_NAMES
912 template <typename DT = void>
913 void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
914 or_<DT>(mod, dst, src0, src1);
915 }
916 template <typename DT = void>
917 void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
918 or_<DT>(mod, dst, src0, src1);
919 }
920#endif
921 template <typename DT = void>
922 void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
923 if (hw >= HW::Gen11) unsupported();
924 opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1);
925 }
926 void ret(const InstructionModifier &mod, RegData src0) {
927 src0.setRegion(2,2,1);
928 if (isGen12)
929 opBranch<true, true>(Opcode::ret, mod, null, src0);
930 else
931 opX<true>(Opcode::ret, DataType::ud, mod, null, src0);
932 }
933 template <typename DT = void>
934 void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
935 opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
936 }
937 template <typename DT = void>
938 void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
939 opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
940 }
941 template <typename DT = void>
942 void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
943 opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
944 }
945 template <typename DT = void>
946 void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
947 opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
948 }
949 template <typename DT = void>
950 void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
951 opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
952 }
953 template <typename DT = void>
954 void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
955 opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
956 }
957 template <typename DT = void>
958 void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
959 opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
960 }
961 template <typename DT = void>
962 void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
963 opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
964 }
965 template <typename DT = void>
966 void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
967 opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
968 }
969 template <typename DT = void>
970 void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
971 opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
972 }
973 template <typename DT = void>
974 void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
975 opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
976 }
977 template <typename DT = void>
978 void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
979 opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
980 }
981 template <typename DT = void>
982 void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
983 if (hw >= HW::Gen12LP) unsupported();
984 opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
985 }
986 template <typename DT = void>
987 void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
988 if (hw >= HW::Gen12LP) unsupported();
989 opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
990 }
991 template <typename DT = void>
992 void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
993 if (hw >= HW::Gen12LP) unsupported();
994 opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
995 }
996 template <typename DT = void>
997 void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
998 if (hw >= HW::Gen12LP) unsupported();
999 opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
1000 }
1001 template <typename DT = void>
1002 void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1003 opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1004 }
1005 template <typename DT = void>
1006 void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1007 opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1008 }
1009
1010 /* Gen12-style sends */
1011 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1012 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1013 }
1014 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1015 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1016 }
1017 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1018 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1019 }
1020 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1021 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1022 }
1023 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1024 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1025 }
1026 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1027 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1028 }
1029 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1030 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1031 }
1032 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1033 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1034 }
1035 /* Pre-Gen12-style sends; also supported on Gen12. */
1036 void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1037 opSend(Opcode::send, mod, dst, src0, exdesc, desc);
1038 }
1039 void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1040 opSend(Opcode::send, mod, dst, src0, exdesc, desc);
1041 }
1042 void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1043 opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
1044 }
1045 void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1046 opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
1047 }
1048 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1049 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1050 }
1051 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1052 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1053 }
1054 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1055 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1056 }
1057 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1058 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1059 }
1060 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1061 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1062 }
1063 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1064 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1065 }
1066 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1067 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1068 }
1069 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1070 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1071 }
1072
1073 template <typename DT = void>
1074 void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1075 opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1076 }
1077 template <typename DT = void>
1078 void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1079 opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1080 }
1081 template <typename DT = void>
1082 void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1083 opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1084 }
1085 template <typename DT = void>
1086 void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1087 opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1088 }
1089 template <typename DT = void>
1090 void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1091 opX(isGen12 ? Opcode::smov_gen12 : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1);
1092 }
1093 template <typename DT = void>
1094 void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1095 opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1096 }
1097 template <typename DT = void>
1098 void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1099 opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1100 }
1101 template <typename DT = void>
1102 void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1103 opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1104 }
1105 template <typename DT = void>
1106 void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1107 opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1108 }
1109 void wait(const InstructionModifier &mod, const RegData &nreg) {
1110#ifdef NGEN_SAFE
1111 if (!nreg.isARF() || nreg.getARFType() != ARFType::n) throw invalid_arf_exception();
1112#endif
1113 opX(Opcode::wait, DataType::invalid, mod, nreg, nreg);
1114 }
1115 void while_(const InstructionModifier &mod, Label &jip) {
1116 opBranch(Opcode::while_, mod, null, jip);
1117 }
1118 template <typename DT = void>
1119 void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1120 opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1121 }
1122 template <typename DT = void>
1123 void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1124 opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1125 }
1126#ifndef NGEN_NO_OP_NAMES
1127 template <typename DT = void>
1128 void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1129 xor_<DT>(mod, dst, src0, src1);
1130 }
1131 template <typename DT = void>
1132 void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1133 xor_<DT>(mod, dst, src0, src1);
1134 }
1135#endif
1136
1137private:
1138 struct Sync {
1139 BinaryCodeGenerator<hw> &parent;
1140
1141 Sync(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1142
1143 void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) {
1144 parent.opSync(Opcode::sync, fc, mod);
1145 }
1146 void operator()(SyncFunction fc, const RegData &src0) {
1147 this->operator()(fc, InstructionModifier(), src0);
1148 }
1149 void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) {
1150 parent.opSync(Opcode::sync, fc, mod, src0);
1151 }
1152 void operator()(SyncFunction fc, int src0) {
1153 this->operator()(fc, InstructionModifier(), src0);
1154 }
1155 void operator()(SyncFunction fc, const InstructionModifier &mod, uint32_t src0) {
1156 parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0));
1157 }
1158 void allrd() {
1159 allrd(null.ud(0)(0, 1, 1));
1160 }
1161 void allrd(const InstructionModifier &mod) {
1162 allrd(mod, null.ud(0)(0, 1, 1));
1163 }
1164 void allrd(const RegData &src0) {
1165 allrd(InstructionModifier(), src0);
1166 }
1167 void allrd(const InstructionModifier &mod, const RegData &src0) {
1168 this->operator()(SyncFunction::allrd, mod, src0);
1169 }
1170 void allrd(uint32_t src0) {
1171 allrd(InstructionModifier(), src0);
1172 }
1173 void allrd(const InstructionModifier &mod, uint32_t src0) {
1174 this->operator()(SyncFunction::allrd, mod, src0);
1175 }
1176 void allwr() {
1177 allwr(null);
1178 }
1179 void allwr(const InstructionModifier &mod) {
1180 allwr(mod, null);
1181 }
1182 void allwr(const RegData &src0) {
1183 allwr(InstructionModifier(), src0);
1184 }
1185 void allwr(const InstructionModifier &mod, const RegData &src0) {
1186 this->operator()(SyncFunction::allwr, mod, src0);
1187 }
1188 void allwr(uint32_t src0) {
1189 allwr(InstructionModifier(), src0);
1190 }
1191 void allwr(const InstructionModifier &mod, uint32_t src0) {
1192 this->operator()(SyncFunction::allwr, mod, src0);
1193 }
1194 void bar(const InstructionModifier &mod = InstructionModifier()) {
1195 this->operator()(SyncFunction::bar, mod);
1196 }
1197 void bar(const InstructionModifier &mod, uint32_t src0) {
1198 this->operator()(SyncFunction::bar, mod, src0);
1199 }
1200 void bar(const InstructionModifier &mod, const RegData &src0) {
1201 this->operator()(SyncFunction::bar, mod, src0);
1202 }
1203 void bar(uint32_t src0) {
1204 this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1205 }
1206 void bar(const RegData &src0) {
1207 this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1208 }
1209 void host(const InstructionModifier &mod = InstructionModifier()) {
1210 this->operator()(SyncFunction::host, mod);
1211 }
1212 void nop(const InstructionModifier &mod = InstructionModifier()) {
1213 this->operator()(SyncFunction::nop, mod);
1214 }
1215 };
1216public:
1217 Sync sync;
1218
1219
1220private:
1221 struct Load {
1222 BinaryCodeGenerator<hw> &parent;
1223
1224 Load(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1225
1226 template <typename DataSpec>
1227 void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr)
1228 {
1229 MessageDescriptor desc;
1230 ExtendedMessageDescriptor exdesc;
1231
1232 encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1233 parent.send(mod, dst, addr, exdesc.all, desc.all);
1234 }
1235
1236 template <typename DataSpec>
1237 void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1238 {
1239 MessageDescriptor desc;
1240 ExtendedMessageDescriptor exdesc;
1241
1242 encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1243 parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1244 }
1245
1246 template <typename DataSpec>
1247 void operator()(SharedFunction sfid, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1248 {
1249 MessageDescriptor desc;
1250 ExtendedMessageDescriptor exdesc;
1251
1252 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1253 encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1254 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1255 parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1256 }
1257
1258 void ugm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1259 {
1260 this->operator()(SharedFunction::ugm, mod, dst, spec, base, addr);
1261 }
1262 void ugml(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1263 {
1264 this->operator()(SharedFunction::ugml, mod, dst, spec, base, addr);
1265 }
1266 void tgm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1267 {
1268 this->operator()(SharedFunction::tgm, mod, dst, spec, base, addr);
1269 }
1270 void slm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1271 {
1272 this->operator()(SharedFunction::slm, mod, dst, spec, base, addr);
1273 }
1274 };
1275
1276 struct Store {
1277 BinaryCodeGenerator<hw> &parent;
1278
1279 Store(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1280
1281 template <typename DataSpec>
1282 void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data)
1283 {
1284 MessageDescriptor desc;
1285 ExtendedMessageDescriptor exdesc;
1286
1287 encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1288 parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all);
1289 }
1290
1291 template <typename DataSpec>
1292 void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1293 {
1294 MessageDescriptor desc;
1295 ExtendedMessageDescriptor exdesc;
1296
1297 encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1298 parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1299 }
1300
1301 template <typename DataSpec>
1302 void operator()(SharedFunction sfid, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1303 {
1304 MessageDescriptor desc;
1305 ExtendedMessageDescriptor exdesc;
1306
1307 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1308 encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1309 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1310 parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1311 }
1312
1313 void ugm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1314 {
1315 this->operator()(SharedFunction::ugm, mod, spec, base, addr, data);
1316 }
1317 void ugml(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1318 {
1319 this->operator()(SharedFunction::ugml, mod, spec, base, addr, data);
1320 }
1321 void tgm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1322 {
1323 this->operator()(SharedFunction::tgm, mod, spec, base, addr, data);
1324 }
1325 void slm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1326 {
1327 this->operator()(SharedFunction::slm, mod, spec, base, addr, data);
1328 }
1329 };
1330
1331 struct Atomic_ {
1332 BinaryCodeGenerator<hw> &parent;
1333
1334 Atomic_(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1335
1336 template <typename DataSpec>
1337 void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1338 {
1339 MessageDescriptor desc;
1340 ExtendedMessageDescriptor exdesc;
1341
1342 encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1343 if (data.isNull())
1344 parent.send(mod, dst, addr, exdesc.all, desc.all);
1345 else
1346 parent.sends(mod, dst, addr, data, exdesc.all, desc.all);
1347 }
1348 template <typename DataSpec>
1349 void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1350 {
1351 (*this)(op, mod, NullRegister(), spec, base, addr, data);
1352 }
1353
1354 template <typename DataSpec>
1355 void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1356 {
1357 MessageDescriptor desc;
1358 ExtendedMessageDescriptor exdesc;
1359
1360 encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1361 parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1362 }
1363 template <typename DataSpec>
1364 void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1365 {
1366 (*this)(op, mod, NullRegister(), spec, base, addr, data);
1367 }
1368 template <typename DataSpec>
1369 void operator()(SharedFunction sfid, AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1370 {
1371 MessageDescriptor desc;
1372 ExtendedMessageDescriptor exdesc;
1373
1374 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1375 encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1376 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1377 parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1378 }
1379
1380 void ugm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1381 {
1382 this->operator()(SharedFunction::ugm, op, mod, dst, spec, base, addr, data);
1383 }
1384 void ugm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1385 {
1386 this->operator()(SharedFunction::ugm, op, mod, NullRegister(), spec, base, addr, data);
1387 }
1388 void ugml(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1389 {
1390 this->operator()(SharedFunction::ugml, op, mod, dst, spec, base, addr, data);
1391 }
1392 void ugml(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1393 {
1394 this->operator()(SharedFunction::ugml, op, mod, NullRegister(), spec, base, addr, data);
1395 }
1396 void tgm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1397 {
1398 this->operator()(SharedFunction::tgm, op, mod, dst, spec, base, addr, data);
1399 }
1400 void tgm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1401 {
1402 this->operator()(SharedFunction::tgm, op, mod, NullRegister(), spec, base, addr, data);
1403 }
1404 void slm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1405 {
1406 this->operator()(SharedFunction::slm, op, mod, dst, spec, base, addr, data);
1407 }
1408 void slm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1409 {
1410 this->operator()(SharedFunction::slm, op, mod, NullRegister(), spec, base, addr, data);
1411 }
1412 };
1413public:
1414 Load load;
1415 Store store;
1416 Atomic_ atomic;
1417
1418 void wrdep(const GRFRange &r) {
1419 int len = r.getLen();
1420 for (int o = 0; o < len; o += 32) {
1421 int thisLen = std::min(len - o, 32);
1422 opX(Opcode::wrdep, DataType::ud, InstructionModifier::createAutoSWSB(), null, r[o], r[o + thisLen - 1]);
1423 }
1424 }
1425 void wrdep(const GRF &r) {
1426 wrdep(r-r);
1427 }
1428
1429#include "ngen_pseudo.hpp"
1430};
1431
1432#define NGEN_FORWARD(hw) \
1433using InstructionStream = typename ngen::BinaryCodeGenerator<hw>::InstructionStream; \
1434using ngen::BinaryCodeGenerator<hw>::isGen12; \
1435template <typename DT = void, typename... Targs> void add(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add<DT>(std::forward<Targs>(args)...); } \
1436template <typename DT = void, typename... Targs> void addc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template addc<DT>(std::forward<Targs>(args)...); } \
1437template <typename DT = void, typename... Targs> void and_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1438template <typename DT = void, typename... Targs> void asr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template asr<DT>(std::forward<Targs>(args)...); } \
1439template <typename DT = void, typename... Targs> void avg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template avg<DT>(std::forward<Targs>(args)...); } \
1440template <typename DT = void, typename... Targs> void bfe(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfe<DT>(std::forward<Targs>(args)...); } \
1441template <typename DT = void, typename... Targs> void bfi1(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi1<DT>(std::forward<Targs>(args)...); } \
1442template <typename DT = void, typename... Targs> void bfi2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi2<DT>(std::forward<Targs>(args)...); } \
1443template <typename DT = void, typename... Targs> void bfrev(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfrev<DT>(std::forward<Targs>(args)...); } \
1444template <typename DT = void, typename... Targs> void cbit(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cbit<DT>(std::forward<Targs>(args)...); } \
1445template <typename DT = void, typename... Targs> void cmp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmp<DT>(std::forward<Targs>(args)...); } \
1446template <typename DT = void, typename... Targs> void cmpn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmpn<DT>(std::forward<Targs>(args)...); } \
1447template <typename DT = void, typename... Targs> void csel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template csel<DT>(std::forward<Targs>(args)...); } \
1448template <typename DT = void, typename... Targs> void dp2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp2<DT>(std::forward<Targs>(args)...); } \
1449template <typename DT = void, typename... Targs> void dp3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp3<DT>(std::forward<Targs>(args)...); } \
1450template <typename DT = void, typename... Targs> void dp4(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4<DT>(std::forward<Targs>(args)...); } \
1451template <typename DT = void, typename... Targs> void dph(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dph<DT>(std::forward<Targs>(args)...); } \
1452template <typename DT = void, typename... Targs> void fbh(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbh<DT>(std::forward<Targs>(args)...); } \
1453template <typename DT = void, typename... Targs> void fbl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbl<DT>(std::forward<Targs>(args)...); } \
1454template <typename DT = void, typename... Targs> void frc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template frc<DT>(std::forward<Targs>(args)...); } \
1455template <typename DT = void, typename... Targs> void line(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template line<DT>(std::forward<Targs>(args)...); } \
1456template <typename DT = void, typename... Targs> void lrp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lrp<DT>(std::forward<Targs>(args)...); } \
1457template <typename DT = void, typename... Targs> void lzd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lzd<DT>(std::forward<Targs>(args)...); } \
1458template <typename DT = void, typename... Targs> void mac(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mac<DT>(std::forward<Targs>(args)...); } \
1459template <typename DT = void, typename... Targs> void macl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template macl<DT>(std::forward<Targs>(args)...); } \
1460template <typename DT = void, typename... Targs> void mach(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mach<DT>(std::forward<Targs>(args)...); } \
1461template <typename DT = void, typename... Targs> void mad(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mad<DT>(std::forward<Targs>(args)...); } \
1462template <typename DT = void, typename... Targs> void madm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template madm<DT>(std::forward<Targs>(args)...); } \
1463template <typename DT = void, typename... Targs> void math(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template math<DT>(std::forward<Targs>(args)...); } \
1464template <typename DT = void, typename... Targs> void mov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mov<DT>(std::forward<Targs>(args)...); } \
1465template <typename DT = void, typename... Targs> void movi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template movi<DT>(std::forward<Targs>(args)...); } \
1466template <typename DT = void, typename... Targs> void mul(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mul<DT>(std::forward<Targs>(args)...); } \
1467template <typename DT = void, typename... Targs> void not_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1468template <typename DT = void, typename... Targs> void or_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1469template <typename DT = void, typename... Targs> void pln(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pln<DT>(std::forward<Targs>(args)...); } \
1470template <typename DT = void, typename... Targs> void rndd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndd<DT>(std::forward<Targs>(args)...); } \
1471template <typename DT = void, typename... Targs> void rnde(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rnde<DT>(std::forward<Targs>(args)...); } \
1472template <typename DT = void, typename... Targs> void rndu(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndu<DT>(std::forward<Targs>(args)...); } \
1473template <typename DT = void, typename... Targs> void rndz(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndz<DT>(std::forward<Targs>(args)...); } \
1474template <typename DT = void, typename... Targs> void rol(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rol<DT>(std::forward<Targs>(args)...); } \
1475template <typename DT = void, typename... Targs> void ror(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template ror<DT>(std::forward<Targs>(args)...); } \
1476template <typename DT = void, typename... Targs> void sad2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sad2<DT>(std::forward<Targs>(args)...); } \
1477template <typename DT = void, typename... Targs> void sada2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sada2<DT>(std::forward<Targs>(args)...); } \
1478template <typename DT = void, typename... Targs> void sel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sel<DT>(std::forward<Targs>(args)...); } \
1479template <typename DT = void, typename... Targs> void shl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shl<DT>(std::forward<Targs>(args)...); } \
1480template <typename DT = void, typename... Targs> void shr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shr<DT>(std::forward<Targs>(args)...); } \
1481template <typename DT = void, typename... Targs> void smov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template smov<DT>(std::forward<Targs>(args)...); } \
1482template <typename DT = void, typename... Targs> void subb(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template subb<DT>(std::forward<Targs>(args)...); } \
1483template <typename DT = void, typename... Targs> void xor_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); } \
1484template <typename... Targs> void brc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brc(std::forward<Targs>(args)...); } \
1485template <typename... Targs> void brd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brd(std::forward<Targs>(args)...); } \
1486template <typename... Targs> void break_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::break_(std::forward<Targs>(args)...); } \
1487template <typename... Targs> void call(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::call(std::forward<Targs>(args)...); } \
1488template <typename... Targs> void calla(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::calla(std::forward<Targs>(args)...); } \
1489template <typename... Targs> void cont(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::cont(std::forward<Targs>(args)...); } \
1490template <typename... Targs> void else_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::else_(std::forward<Targs>(args)...); } \
1491template <typename... Targs> void endif(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::endif(std::forward<Targs>(args)...); } \
1492template <typename... Targs> void goto_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::goto_(std::forward<Targs>(args)...); } \
1493template <typename... Targs> void halt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::halt(std::forward<Targs>(args)...); } \
1494template <typename... Targs> void if_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::if_(std::forward<Targs>(args)...); } \
1495template <typename... Targs> void illegal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::illegal(std::forward<Targs>(args)...); } \
1496template <typename... Targs> void join(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::join(std::forward<Targs>(args)...); } \
1497template <typename... Targs> void jmpi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::jmpi(std::forward<Targs>(args)...); } \
1498template <typename... Targs> void nop(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::nop(std::forward<Targs>(args)...); } \
1499template <typename... Targs> void ret(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::ret(std::forward<Targs>(args)...); } \
1500template <typename... Targs> void send(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::send(std::forward<Targs>(args)...); } \
1501template <typename... Targs> void sendc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendc(std::forward<Targs>(args)...); } \
1502template <typename... Targs> void sends(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sends(std::forward<Targs>(args)...); } \
1503template <typename... Targs> void sendsc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendsc(std::forward<Targs>(args)...); } \
1504using ngen::BinaryCodeGenerator<hw>::sync; \
1505template <typename... Targs> void wait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wait(std::forward<Targs>(args)...); } \
1506template <typename... Targs> void while_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::while_(std::forward<Targs>(args)...); } \
1507template <typename... Targs> void wrdep(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wrdep(std::forward<Targs>(args)...); } \
1508template <typename DT = void, typename... Targs> void min_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min_<DT>(std::forward<Targs>(args)...); } \
1509template <typename DT = void, typename... Targs> void max_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max_<DT>(std::forward<Targs>(args)...); } \
1510template <typename DT = void, typename... Targs> void bfi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi<DT>(std::forward<Targs>(args)...); } \
1511template <typename DT = void, typename... Targs> void cos(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cos<DT>(std::forward<Targs>(args)...); } \
1512template <typename DT = void, typename... Targs> void exp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template exp<DT>(std::forward<Targs>(args)...); } \
1513template <typename DT = void, typename... Targs> void fdiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv<DT>(std::forward<Targs>(args)...); } \
1514template <typename DT = void, typename... Targs> void idiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template idiv<DT>(std::forward<Targs>(args)...); } \
1515template <typename DT = void, typename... Targs> void inv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv<DT>(std::forward<Targs>(args)...); } \
1516template <typename DT = void, typename... Targs> void invm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template invm<DT>(std::forward<Targs>(args)...); } \
1517template <typename DT = void, typename... Targs> void iqot(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template iqot<DT>(std::forward<Targs>(args)...); } \
1518template <typename DT = void, typename... Targs> void irem(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template irem<DT>(std::forward<Targs>(args)...); } \
1519template <typename DT = void, typename... Targs> void log(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template log<DT>(std::forward<Targs>(args)...); } \
1520template <typename DT = void, typename... Targs> void pow(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pow<DT>(std::forward<Targs>(args)...); } \
1521template <typename DT = void, typename... Targs> void rsqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqt<DT>(std::forward<Targs>(args)...); } \
1522template <typename DT = void, typename... Targs> void rsqtm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqtm<DT>(std::forward<Targs>(args)...); } \
1523template <typename DT = void, typename... Targs> void sin(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sin<DT>(std::forward<Targs>(args)...); } \
1524template <typename DT = void, typename... Targs> void sqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt<DT>(std::forward<Targs>(args)...); } \
1525template <typename DT = void, typename... Targs> void fdiv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv_ieee<DT>(std::forward<Targs>(args)...); } \
1526template <typename DT = void, typename... Targs> void inv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv_ieee<DT>(std::forward<Targs>(args)...); } \
1527template <typename DT = void, typename... Targs> void sqt_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt_ieee<DT>(std::forward<Targs>(args)...); } \
1528template <typename... Targs> void threadend(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::threadend(std::forward<Targs>(args)...); } \
1529template <typename... Targs> void barrierheader(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierheader(std::forward<Targs>(args)...); } \
1530template <typename... Targs> void barriermsg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriermsg(std::forward<Targs>(args)...); } \
1531template <typename... Targs> void barriersignal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriersignal(std::forward<Targs>(args)...); } \
1532template <typename... Targs> void barrierwait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierwait(std::forward<Targs>(args)...); } \
1533template <typename... Targs> void barrier(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrier(std::forward<Targs>(args)...); } \
1534using ngen::BinaryCodeGenerator<hw>::load; \
1535using ngen::BinaryCodeGenerator<hw>::store; \
1536using ngen::BinaryCodeGenerator<hw>::atomic; \
1537template <typename... Targs> void memfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::memfence(std::forward<Targs>(args)...); } \
1538template <typename... Targs> void slmfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::slmfence(std::forward<Targs>(args)...); } \
1539template <typename... Targs> void loadlid(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::loadlid(std::forward<Targs>(args)...); } \
1540template <typename... Targs> void loadargs(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::loadargs(std::forward<Targs>(args)...); } \
1541template <typename... Targs> void epilogue(int GRFCount, bool hasSLM, const ngen::RegData &r0_info) { ngen::BinaryCodeGenerator<hw>::epilogue(GRFCount, hasSLM, r0_info); } \
1542template <typename... Targs> void pushStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::pushStream(std::forward<Targs>(args)...); } \
1543template <typename... Targs> InstructionStream *popStream(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::popStream(std::forward<Targs>(args)...); } \
1544template <typename... Targs> void appendStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendStream(std::forward<Targs>(args)...); } \
1545template <typename... Targs> void appendCurrentStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendCurrentStream(std::forward<Targs>(args)...); } \
1546template <typename... Targs> void discardStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::discardStream(std::forward<Targs>(args)...); } \
1547template <typename... Targs> void mark(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::mark(std::forward<Targs>(args)...); } \
1548template <typename... Targs> void comment(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::comment(std::forward<Targs>(args)...); } \
1549template <typename... Targs> void setDefaultNoMask(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultNoMask(std::forward<Targs>(args)...); } \
1550template <typename... Targs> void setDefaultAutoSWSB(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultAutoSWSB(std::forward<Targs>(args)...); } \
1551bool getDefaultNoMask() { return ngen::BinaryCodeGenerator<hw>::getDefaultNoMask(); } \
1552bool getDefaultAutoSWSB() { return ngen::BinaryCodeGenerator<hw>::getDefaultAutoSWSB(); } \
1553using ngen::BinaryCodeGenerator<hw>::stepping; \
1554int getStepping() { return ngen::BinaryCodeGenerator<hw>::getStepping(); } \
1555void setStepping(int stepping_) { ngen::BinaryCodeGenerator<hw>::setStepping(stepping_); } \
1556NGEN_FORWARD_EXTRA \
1557NGEN_FORWARD_OP_NAMES \
1558NGEN_FORWARD_MIN_MAX \
1559NGEN_FORWARD_REGISTERS
1560
1561#define NGEN_FORWARD_EXTRA \
1562template <typename DT = void, typename... Targs> void add3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add3<DT>(std::forward<Targs>(args)...); } \
1563template <typename DT = void, typename... Targs> void bfn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfn<DT>(std::forward<Targs>(args)...); } \
1564template <typename DT = void, typename... Targs> void dp4a(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4a<DT>(std::forward<Targs>(args)...); } \
1565template <typename DT = void, typename... Targs> void dpas(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dpas<DT>(std::forward<Targs>(args)...); } \
1566template <typename DT = void, typename... Targs> void dpasw(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dpasw<DT>(std::forward<Targs>(args)...); }
1567
1568#ifdef NGEN_NO_OP_NAMES
1569#define NGEN_FORWARD_OP_NAMES
1570#else
1571#define NGEN_FORWARD_OP_NAMES \
1572template <typename DT = void, typename... Targs> void and(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1573template <typename DT = void, typename... Targs> void not(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1574template <typename DT = void, typename... Targs> void or(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1575template <typename DT = void, typename... Targs> void xor(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); }
1576#endif
1577
1578#ifdef NGEN_WINDOWS_COMPAT
1579#define NGEN_FORWARD_MIN_MAX
1580#else
1581#define NGEN_FORWARD_MIN_MAX \
1582template <typename DT = void, typename... Targs> void min(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min<DT>(std::forward<Targs>(args)...); } \
1583template <typename DT = void, typename... Targs> void max(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max<DT>(std::forward<Targs>(args)...); }
1584#endif
1585
1586#ifdef NGEN_GLOBAL_REGS
1587#define NGEN_FORWARD_REGISTERS
1588#else
1589#define NGEN_FORWARD_REGISTERS_BASE \
1590using ngen::BinaryCodeGenerator<hw>::indirect; \
1591using ngen::BinaryCodeGenerator<hw>::r0; using ngen::BinaryCodeGenerator<hw>::r1; using ngen::BinaryCodeGenerator<hw>::r2; using ngen::BinaryCodeGenerator<hw>::r3; \
1592using ngen::BinaryCodeGenerator<hw>::r4; using ngen::BinaryCodeGenerator<hw>::r5; using ngen::BinaryCodeGenerator<hw>::r6; using ngen::BinaryCodeGenerator<hw>::r7; \
1593using ngen::BinaryCodeGenerator<hw>::r8; using ngen::BinaryCodeGenerator<hw>::r9; using ngen::BinaryCodeGenerator<hw>::r10; using ngen::BinaryCodeGenerator<hw>::r11; \
1594using ngen::BinaryCodeGenerator<hw>::r12; using ngen::BinaryCodeGenerator<hw>::r13; using ngen::BinaryCodeGenerator<hw>::r14; using ngen::BinaryCodeGenerator<hw>::r15; \
1595using ngen::BinaryCodeGenerator<hw>::r16; using ngen::BinaryCodeGenerator<hw>::r17; using ngen::BinaryCodeGenerator<hw>::r18; using ngen::BinaryCodeGenerator<hw>::r19; \
1596using ngen::BinaryCodeGenerator<hw>::r20; using ngen::BinaryCodeGenerator<hw>::r21; using ngen::BinaryCodeGenerator<hw>::r22; using ngen::BinaryCodeGenerator<hw>::r23; \
1597using ngen::BinaryCodeGenerator<hw>::r24; using ngen::BinaryCodeGenerator<hw>::r25; using ngen::BinaryCodeGenerator<hw>::r26; using ngen::BinaryCodeGenerator<hw>::r27; \
1598using ngen::BinaryCodeGenerator<hw>::r28; using ngen::BinaryCodeGenerator<hw>::r29; using ngen::BinaryCodeGenerator<hw>::r30; using ngen::BinaryCodeGenerator<hw>::r31; \
1599using ngen::BinaryCodeGenerator<hw>::r32; using ngen::BinaryCodeGenerator<hw>::r33; using ngen::BinaryCodeGenerator<hw>::r34; using ngen::BinaryCodeGenerator<hw>::r35; \
1600using ngen::BinaryCodeGenerator<hw>::r36; using ngen::BinaryCodeGenerator<hw>::r37; using ngen::BinaryCodeGenerator<hw>::r38; using ngen::BinaryCodeGenerator<hw>::r39; \
1601using ngen::BinaryCodeGenerator<hw>::r40; using ngen::BinaryCodeGenerator<hw>::r41; using ngen::BinaryCodeGenerator<hw>::r42; using ngen::BinaryCodeGenerator<hw>::r43; \
1602using ngen::BinaryCodeGenerator<hw>::r44; using ngen::BinaryCodeGenerator<hw>::r45; using ngen::BinaryCodeGenerator<hw>::r46; using ngen::BinaryCodeGenerator<hw>::r47; \
1603using ngen::BinaryCodeGenerator<hw>::r48; using ngen::BinaryCodeGenerator<hw>::r49; using ngen::BinaryCodeGenerator<hw>::r50; using ngen::BinaryCodeGenerator<hw>::r51; \
1604using ngen::BinaryCodeGenerator<hw>::r52; using ngen::BinaryCodeGenerator<hw>::r53; using ngen::BinaryCodeGenerator<hw>::r54; using ngen::BinaryCodeGenerator<hw>::r55; \
1605using ngen::BinaryCodeGenerator<hw>::r56; using ngen::BinaryCodeGenerator<hw>::r57; using ngen::BinaryCodeGenerator<hw>::r58; using ngen::BinaryCodeGenerator<hw>::r59; \
1606using ngen::BinaryCodeGenerator<hw>::r60; using ngen::BinaryCodeGenerator<hw>::r61; using ngen::BinaryCodeGenerator<hw>::r62; using ngen::BinaryCodeGenerator<hw>::r63; \
1607using ngen::BinaryCodeGenerator<hw>::r64; using ngen::BinaryCodeGenerator<hw>::r65; using ngen::BinaryCodeGenerator<hw>::r66; using ngen::BinaryCodeGenerator<hw>::r67; \
1608using ngen::BinaryCodeGenerator<hw>::r68; using ngen::BinaryCodeGenerator<hw>::r69; using ngen::BinaryCodeGenerator<hw>::r70; using ngen::BinaryCodeGenerator<hw>::r71; \
1609using ngen::BinaryCodeGenerator<hw>::r72; using ngen::BinaryCodeGenerator<hw>::r73; using ngen::BinaryCodeGenerator<hw>::r74; using ngen::BinaryCodeGenerator<hw>::r75; \
1610using ngen::BinaryCodeGenerator<hw>::r76; using ngen::BinaryCodeGenerator<hw>::r77; using ngen::BinaryCodeGenerator<hw>::r78; using ngen::BinaryCodeGenerator<hw>::r79; \
1611using ngen::BinaryCodeGenerator<hw>::r80; using ngen::BinaryCodeGenerator<hw>::r81; using ngen::BinaryCodeGenerator<hw>::r82; using ngen::BinaryCodeGenerator<hw>::r83; \
1612using ngen::BinaryCodeGenerator<hw>::r84; using ngen::BinaryCodeGenerator<hw>::r85; using ngen::BinaryCodeGenerator<hw>::r86; using ngen::BinaryCodeGenerator<hw>::r87; \
1613using ngen::BinaryCodeGenerator<hw>::r88; using ngen::BinaryCodeGenerator<hw>::r89; using ngen::BinaryCodeGenerator<hw>::r90; using ngen::BinaryCodeGenerator<hw>::r91; \
1614using ngen::BinaryCodeGenerator<hw>::r92; using ngen::BinaryCodeGenerator<hw>::r93; using ngen::BinaryCodeGenerator<hw>::r94; using ngen::BinaryCodeGenerator<hw>::r95; \
1615using ngen::BinaryCodeGenerator<hw>::r96; using ngen::BinaryCodeGenerator<hw>::r97; using ngen::BinaryCodeGenerator<hw>::r98; using ngen::BinaryCodeGenerator<hw>::r99; \
1616using ngen::BinaryCodeGenerator<hw>::r100; using ngen::BinaryCodeGenerator<hw>::r101; using ngen::BinaryCodeGenerator<hw>::r102; using ngen::BinaryCodeGenerator<hw>::r103; \
1617using ngen::BinaryCodeGenerator<hw>::r104; using ngen::BinaryCodeGenerator<hw>::r105; using ngen::BinaryCodeGenerator<hw>::r106; using ngen::BinaryCodeGenerator<hw>::r107; \
1618using ngen::BinaryCodeGenerator<hw>::r108; using ngen::BinaryCodeGenerator<hw>::r109; using ngen::BinaryCodeGenerator<hw>::r110; using ngen::BinaryCodeGenerator<hw>::r111; \
1619using ngen::BinaryCodeGenerator<hw>::r112; using ngen::BinaryCodeGenerator<hw>::r113; using ngen::BinaryCodeGenerator<hw>::r114; using ngen::BinaryCodeGenerator<hw>::r115; \
1620using ngen::BinaryCodeGenerator<hw>::r116; using ngen::BinaryCodeGenerator<hw>::r117; using ngen::BinaryCodeGenerator<hw>::r118; using ngen::BinaryCodeGenerator<hw>::r119; \
1621using ngen::BinaryCodeGenerator<hw>::r120; using ngen::BinaryCodeGenerator<hw>::r121; using ngen::BinaryCodeGenerator<hw>::r122; using ngen::BinaryCodeGenerator<hw>::r123; \
1622using ngen::BinaryCodeGenerator<hw>::r124; using ngen::BinaryCodeGenerator<hw>::r125; using ngen::BinaryCodeGenerator<hw>::r126; using ngen::BinaryCodeGenerator<hw>::r127; \
1623using ngen::BinaryCodeGenerator<hw>::null; \
1624using ngen::BinaryCodeGenerator<hw>::a0; \
1625using ngen::BinaryCodeGenerator<hw>::acc0; using ngen::BinaryCodeGenerator<hw>::acc1; using ngen::BinaryCodeGenerator<hw>::acc2; using ngen::BinaryCodeGenerator<hw>::acc3; \
1626using ngen::BinaryCodeGenerator<hw>::acc4; using ngen::BinaryCodeGenerator<hw>::acc5; using ngen::BinaryCodeGenerator<hw>::acc6; using ngen::BinaryCodeGenerator<hw>::acc7; \
1627using ngen::BinaryCodeGenerator<hw>::acc8; using ngen::BinaryCodeGenerator<hw>::acc9; \
1628using ngen::BinaryCodeGenerator<hw>::mme0; using ngen::BinaryCodeGenerator<hw>::mme1; using ngen::BinaryCodeGenerator<hw>::mme2; using ngen::BinaryCodeGenerator<hw>::mme3; \
1629using ngen::BinaryCodeGenerator<hw>::mme4; using ngen::BinaryCodeGenerator<hw>::mme5; using ngen::BinaryCodeGenerator<hw>::mme6; using ngen::BinaryCodeGenerator<hw>::mme7; \
1630using ngen::BinaryCodeGenerator<hw>::noacc; using ngen::BinaryCodeGenerator<hw>::nomme; \
1631using ngen::BinaryCodeGenerator<hw>::f0; using ngen::BinaryCodeGenerator<hw>::f1; \
1632using ngen::BinaryCodeGenerator<hw>::ce0; using ngen::BinaryCodeGenerator<hw>::sp; using ngen::BinaryCodeGenerator<hw>::sr0; using ngen::BinaryCodeGenerator<hw>::sr1; \
1633using ngen::BinaryCodeGenerator<hw>::cr0; using ngen::BinaryCodeGenerator<hw>::n0; using ngen::BinaryCodeGenerator<hw>::ip; using ngen::BinaryCodeGenerator<hw>::tdr0; \
1634using ngen::BinaryCodeGenerator<hw>::tm0; using ngen::BinaryCodeGenerator<hw>::tm1; using ngen::BinaryCodeGenerator<hw>::tm2; using ngen::BinaryCodeGenerator<hw>::tm3; \
1635using ngen::BinaryCodeGenerator<hw>::tm4; using ngen::BinaryCodeGenerator<hw>::pm0; using ngen::BinaryCodeGenerator<hw>::tp0; using ngen::BinaryCodeGenerator<hw>::dbg0; \
1636using ngen::BinaryCodeGenerator<hw>::fc0; using ngen::BinaryCodeGenerator<hw>::fc1; using ngen::BinaryCodeGenerator<hw>::fc2; using ngen::BinaryCodeGenerator<hw>::fc3; \
1637using ngen::BinaryCodeGenerator<hw>::NoDDClr; using ngen::BinaryCodeGenerator<hw>::NoDDChk; \
1638using ngen::BinaryCodeGenerator<hw>::AccWrEn; using ngen::BinaryCodeGenerator<hw>::NoSrcDepSet; using ngen::BinaryCodeGenerator<hw>::Breakpoint; using ngen::BinaryCodeGenerator<hw>::sat; \
1639using ngen::BinaryCodeGenerator<hw>::NoMask; \
1640using ngen::BinaryCodeGenerator<hw>::Serialize; using ngen::BinaryCodeGenerator<hw>::EOT; \
1641using ngen::BinaryCodeGenerator<hw>::Atomic; using ngen::BinaryCodeGenerator<hw>::Switch; using ngen::BinaryCodeGenerator<hw>::NoPreempt; \
1642using ngen::BinaryCodeGenerator<hw>::anyv; using ngen::BinaryCodeGenerator<hw>::allv; using ngen::BinaryCodeGenerator<hw>::any2h; using ngen::BinaryCodeGenerator<hw>::all2h; \
1643using ngen::BinaryCodeGenerator<hw>::any4h; using ngen::BinaryCodeGenerator<hw>::all4h; using ngen::BinaryCodeGenerator<hw>::any8h; using ngen::BinaryCodeGenerator<hw>::all8h; \
1644using ngen::BinaryCodeGenerator<hw>::any16h; using ngen::BinaryCodeGenerator<hw>::all16h; using ngen::BinaryCodeGenerator<hw>::any32h; using ngen::BinaryCodeGenerator<hw>::all32h; \
1645using ngen::BinaryCodeGenerator<hw>::x_repl; using ngen::BinaryCodeGenerator<hw>::y_repl; using ngen::BinaryCodeGenerator<hw>::z_repl; using ngen::BinaryCodeGenerator<hw>::w_repl; \
1646using ngen::BinaryCodeGenerator<hw>::ze; using ngen::BinaryCodeGenerator<hw>::eq; using ngen::BinaryCodeGenerator<hw>::nz; using ngen::BinaryCodeGenerator<hw>::ne; \
1647using ngen::BinaryCodeGenerator<hw>::gt; using ngen::BinaryCodeGenerator<hw>::ge; using ngen::BinaryCodeGenerator<hw>::lt; using ngen::BinaryCodeGenerator<hw>::le; \
1648using ngen::BinaryCodeGenerator<hw>::ov; using ngen::BinaryCodeGenerator<hw>::un; using ngen::BinaryCodeGenerator<hw>::eo; \
1649using ngen::BinaryCodeGenerator<hw>::M0; using ngen::BinaryCodeGenerator<hw>::M4; using ngen::BinaryCodeGenerator<hw>::M8; using ngen::BinaryCodeGenerator<hw>::M12; \
1650using ngen::BinaryCodeGenerator<hw>::M16; using ngen::BinaryCodeGenerator<hw>::M20; using ngen::BinaryCodeGenerator<hw>::M24; using ngen::BinaryCodeGenerator<hw>::M28; \
1651using ngen::BinaryCodeGenerator<hw>::sb0; using ngen::BinaryCodeGenerator<hw>::sb1; using ngen::BinaryCodeGenerator<hw>::sb2; using ngen::BinaryCodeGenerator<hw>::sb3; \
1652using ngen::BinaryCodeGenerator<hw>::sb4; using ngen::BinaryCodeGenerator<hw>::sb5; using ngen::BinaryCodeGenerator<hw>::sb6; using ngen::BinaryCodeGenerator<hw>::sb7; \
1653using ngen::BinaryCodeGenerator<hw>::sb8; using ngen::BinaryCodeGenerator<hw>::sb9; using ngen::BinaryCodeGenerator<hw>::sb10; using ngen::BinaryCodeGenerator<hw>::sb11; \
1654using ngen::BinaryCodeGenerator<hw>::sb12; using ngen::BinaryCodeGenerator<hw>::sb13; using ngen::BinaryCodeGenerator<hw>::sb14; using ngen::BinaryCodeGenerator<hw>::sb15; \
1655using ngen::BinaryCodeGenerator<hw>::A32; using ngen::BinaryCodeGenerator<hw>::A32NC; using ngen::BinaryCodeGenerator<hw>::A64; using ngen::BinaryCodeGenerator<hw>::A64NC; \
1656using ngen::BinaryCodeGenerator<hw>::SLM; \
1657template <typename... Targs> ngen::InstructionModifier ExecutionOffset(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::ExecutionOffset(std::forward<Targs>(args)...); } \
1658template <typename... Targs> ngen::AddressBase Surface(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::Surface(std::forward<Targs>(args)...); } \
1659template <typename... Targs> ngen::AddressBase CC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::CC(std::forward<Targs>(args)...); } \
1660template <typename... Targs> ngen::AddressBase SC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::SC(std::forward<Targs>(args)...); }
1661#define NGEN_FORWARD_REGISTERS_EXTRA1 \
1662using ngen::BinaryCodeGenerator<hw>::r128; using ngen::BinaryCodeGenerator<hw>::r129; using ngen::BinaryCodeGenerator<hw>::r130; using ngen::BinaryCodeGenerator<hw>::r131; \
1663using ngen::BinaryCodeGenerator<hw>::r132; using ngen::BinaryCodeGenerator<hw>::r133; using ngen::BinaryCodeGenerator<hw>::r134; using ngen::BinaryCodeGenerator<hw>::r135; \
1664using ngen::BinaryCodeGenerator<hw>::r136; using ngen::BinaryCodeGenerator<hw>::r137; using ngen::BinaryCodeGenerator<hw>::r138; using ngen::BinaryCodeGenerator<hw>::r139; \
1665using ngen::BinaryCodeGenerator<hw>::r140; using ngen::BinaryCodeGenerator<hw>::r141; using ngen::BinaryCodeGenerator<hw>::r142; using ngen::BinaryCodeGenerator<hw>::r143; \
1666using ngen::BinaryCodeGenerator<hw>::r144; using ngen::BinaryCodeGenerator<hw>::r145; using ngen::BinaryCodeGenerator<hw>::r146; using ngen::BinaryCodeGenerator<hw>::r147; \
1667using ngen::BinaryCodeGenerator<hw>::r148; using ngen::BinaryCodeGenerator<hw>::r149; using ngen::BinaryCodeGenerator<hw>::r150; using ngen::BinaryCodeGenerator<hw>::r151; \
1668using ngen::BinaryCodeGenerator<hw>::r152; using ngen::BinaryCodeGenerator<hw>::r153; using ngen::BinaryCodeGenerator<hw>::r154; using ngen::BinaryCodeGenerator<hw>::r155; \
1669using ngen::BinaryCodeGenerator<hw>::r156; using ngen::BinaryCodeGenerator<hw>::r157; using ngen::BinaryCodeGenerator<hw>::r158; using ngen::BinaryCodeGenerator<hw>::r159; \
1670using ngen::BinaryCodeGenerator<hw>::r160; using ngen::BinaryCodeGenerator<hw>::r161; using ngen::BinaryCodeGenerator<hw>::r162; using ngen::BinaryCodeGenerator<hw>::r163; \
1671using ngen::BinaryCodeGenerator<hw>::r164; using ngen::BinaryCodeGenerator<hw>::r165; using ngen::BinaryCodeGenerator<hw>::r166; using ngen::BinaryCodeGenerator<hw>::r167; \
1672using ngen::BinaryCodeGenerator<hw>::r168; using ngen::BinaryCodeGenerator<hw>::r169; using ngen::BinaryCodeGenerator<hw>::r170; using ngen::BinaryCodeGenerator<hw>::r171; \
1673using ngen::BinaryCodeGenerator<hw>::r172; using ngen::BinaryCodeGenerator<hw>::r173; using ngen::BinaryCodeGenerator<hw>::r174; using ngen::BinaryCodeGenerator<hw>::r175; \
1674using ngen::BinaryCodeGenerator<hw>::r176; using ngen::BinaryCodeGenerator<hw>::r177; using ngen::BinaryCodeGenerator<hw>::r178; using ngen::BinaryCodeGenerator<hw>::r179; \
1675using ngen::BinaryCodeGenerator<hw>::r180; using ngen::BinaryCodeGenerator<hw>::r181; using ngen::BinaryCodeGenerator<hw>::r182; using ngen::BinaryCodeGenerator<hw>::r183; \
1676using ngen::BinaryCodeGenerator<hw>::r184; using ngen::BinaryCodeGenerator<hw>::r185; using ngen::BinaryCodeGenerator<hw>::r186; using ngen::BinaryCodeGenerator<hw>::r187; \
1677using ngen::BinaryCodeGenerator<hw>::r188; using ngen::BinaryCodeGenerator<hw>::r189; using ngen::BinaryCodeGenerator<hw>::r190; using ngen::BinaryCodeGenerator<hw>::r191; \
1678using ngen::BinaryCodeGenerator<hw>::r192; using ngen::BinaryCodeGenerator<hw>::r193; using ngen::BinaryCodeGenerator<hw>::r194; using ngen::BinaryCodeGenerator<hw>::r195; \
1679using ngen::BinaryCodeGenerator<hw>::r196; using ngen::BinaryCodeGenerator<hw>::r197; using ngen::BinaryCodeGenerator<hw>::r198; using ngen::BinaryCodeGenerator<hw>::r199; \
1680using ngen::BinaryCodeGenerator<hw>::r200; using ngen::BinaryCodeGenerator<hw>::r201; using ngen::BinaryCodeGenerator<hw>::r202; using ngen::BinaryCodeGenerator<hw>::r203; \
1681using ngen::BinaryCodeGenerator<hw>::r204; using ngen::BinaryCodeGenerator<hw>::r205; using ngen::BinaryCodeGenerator<hw>::r206; using ngen::BinaryCodeGenerator<hw>::r207; \
1682using ngen::BinaryCodeGenerator<hw>::r208; using ngen::BinaryCodeGenerator<hw>::r209; using ngen::BinaryCodeGenerator<hw>::r210; using ngen::BinaryCodeGenerator<hw>::r211; \
1683using ngen::BinaryCodeGenerator<hw>::r212; using ngen::BinaryCodeGenerator<hw>::r213; using ngen::BinaryCodeGenerator<hw>::r214; using ngen::BinaryCodeGenerator<hw>::r215; \
1684using ngen::BinaryCodeGenerator<hw>::r216; using ngen::BinaryCodeGenerator<hw>::r217; using ngen::BinaryCodeGenerator<hw>::r218; using ngen::BinaryCodeGenerator<hw>::r219; \
1685using ngen::BinaryCodeGenerator<hw>::r220; using ngen::BinaryCodeGenerator<hw>::r221; using ngen::BinaryCodeGenerator<hw>::r222; using ngen::BinaryCodeGenerator<hw>::r223; \
1686using ngen::BinaryCodeGenerator<hw>::r224; using ngen::BinaryCodeGenerator<hw>::r225; using ngen::BinaryCodeGenerator<hw>::r226; using ngen::BinaryCodeGenerator<hw>::r227; \
1687using ngen::BinaryCodeGenerator<hw>::r228; using ngen::BinaryCodeGenerator<hw>::r229; using ngen::BinaryCodeGenerator<hw>::r230; using ngen::BinaryCodeGenerator<hw>::r231; \
1688using ngen::BinaryCodeGenerator<hw>::r232; using ngen::BinaryCodeGenerator<hw>::r233; using ngen::BinaryCodeGenerator<hw>::r234; using ngen::BinaryCodeGenerator<hw>::r235; \
1689using ngen::BinaryCodeGenerator<hw>::r236; using ngen::BinaryCodeGenerator<hw>::r237; using ngen::BinaryCodeGenerator<hw>::r238; using ngen::BinaryCodeGenerator<hw>::r239; \
1690using ngen::BinaryCodeGenerator<hw>::r240; using ngen::BinaryCodeGenerator<hw>::r241; using ngen::BinaryCodeGenerator<hw>::r242; using ngen::BinaryCodeGenerator<hw>::r243; \
1691using ngen::BinaryCodeGenerator<hw>::r244; using ngen::BinaryCodeGenerator<hw>::r245; using ngen::BinaryCodeGenerator<hw>::r246; using ngen::BinaryCodeGenerator<hw>::r247; \
1692using ngen::BinaryCodeGenerator<hw>::r248; using ngen::BinaryCodeGenerator<hw>::r249; using ngen::BinaryCodeGenerator<hw>::r250; using ngen::BinaryCodeGenerator<hw>::r251; \
1693using ngen::BinaryCodeGenerator<hw>::r252; using ngen::BinaryCodeGenerator<hw>::r253; using ngen::BinaryCodeGenerator<hw>::r254; using ngen::BinaryCodeGenerator<hw>::r255;
1694#define NGEN_FORWARD_REGISTERS_EXTRA2 \
1695using ngen::BinaryCodeGenerator<hw>::D8; using ngen::BinaryCodeGenerator<hw>::D16; using ngen::BinaryCodeGenerator<hw>::D32; using ngen::BinaryCodeGenerator<hw>::D64; \
1696using ngen::BinaryCodeGenerator<hw>::D8U32; using ngen::BinaryCodeGenerator<hw>::D16U32; \
1697using ngen::BinaryCodeGenerator<hw>::D8T; using ngen::BinaryCodeGenerator<hw>::D16T; using ngen::BinaryCodeGenerator<hw>::D32T; using ngen::BinaryCodeGenerator<hw>::D64T; \
1698using ngen::BinaryCodeGenerator<hw>::D8U32T; using ngen::BinaryCodeGenerator<hw>::D16U32T; \
1699using ngen::BinaryCodeGenerator<hw>::V1; using ngen::BinaryCodeGenerator<hw>::V2; using ngen::BinaryCodeGenerator<hw>::V3; using ngen::BinaryCodeGenerator<hw>::V4; \
1700using ngen::BinaryCodeGenerator<hw>::V8; using ngen::BinaryCodeGenerator<hw>::V16; using ngen::BinaryCodeGenerator<hw>::V32; using ngen::BinaryCodeGenerator<hw>::V64; \
1701using ngen::BinaryCodeGenerator<hw>::V1T; using ngen::BinaryCodeGenerator<hw>::V2T; using ngen::BinaryCodeGenerator<hw>::V3T; using ngen::BinaryCodeGenerator<hw>::V4T; \
1702using ngen::BinaryCodeGenerator<hw>::V8T; using ngen::BinaryCodeGenerator<hw>::V16T; using ngen::BinaryCodeGenerator<hw>::V32T; using ngen::BinaryCodeGenerator<hw>::V64T; \
1703using ngen::BinaryCodeGenerator<hw>::transpose; \
1704using ngen::BinaryCodeGenerator<hw>::L1UC_L3UC; using ngen::BinaryCodeGenerator<hw>::L1UC_L3C; using ngen::BinaryCodeGenerator<hw>::L1C_L3UC; using ngen::BinaryCodeGenerator<hw>::L1C_L3C; \
1705using ngen::BinaryCodeGenerator<hw>::L1S_L3UC; using ngen::BinaryCodeGenerator<hw>::L1S_L3C; using ngen::BinaryCodeGenerator<hw>::L1IAR_L3C; using ngen::BinaryCodeGenerator<hw>::L1UC_L3WB; \
1706using ngen::BinaryCodeGenerator<hw>::L1WT_L3UC; using ngen::BinaryCodeGenerator<hw>::L1WT_L3WB; using ngen::BinaryCodeGenerator<hw>::L1S_L3WB; using ngen::BinaryCodeGenerator<hw>::L1WB_L3WB;
1707#define NGEN_FORWARD_REGISTERS_EXTRA3 \
1708using ngen::BinaryCodeGenerator<hw>::any; using ngen::BinaryCodeGenerator<hw>::all; \
1709using ngen::BinaryCodeGenerator<hw>::f2; using ngen::BinaryCodeGenerator<hw>::f3; \
1710using ngen::BinaryCodeGenerator<hw>::sb16; using ngen::BinaryCodeGenerator<hw>::sb17; using ngen::BinaryCodeGenerator<hw>::sb18; using ngen::BinaryCodeGenerator<hw>::sb19; \
1711using ngen::BinaryCodeGenerator<hw>::sb20; using ngen::BinaryCodeGenerator<hw>::sb21; using ngen::BinaryCodeGenerator<hw>::sb22; using ngen::BinaryCodeGenerator<hw>::sb23; \
1712using ngen::BinaryCodeGenerator<hw>::sb24; using ngen::BinaryCodeGenerator<hw>::sb25; using ngen::BinaryCodeGenerator<hw>::sb26; using ngen::BinaryCodeGenerator<hw>::sb27; \
1713using ngen::BinaryCodeGenerator<hw>::sb28; using ngen::BinaryCodeGenerator<hw>::sb29; using ngen::BinaryCodeGenerator<hw>::sb30; using ngen::BinaryCodeGenerator<hw>::sb31; \
1714using ngen::BinaryCodeGenerator<hw>::NoAccSBSet; \
1715using ngen::BinaryCodeGenerator<hw>::vnni;
1716#define NGEN_FORWARD_REGISTERS NGEN_FORWARD_REGISTERS_BASE NGEN_FORWARD_REGISTERS_EXTRA1 NGEN_FORWARD_REGISTERS_EXTRA2 NGEN_FORWARD_REGISTERS_EXTRA3
1717#endif
1718
1719template <HW hw>
1720inline void BinaryCodeGenerator<hw>::unsupported()
1721{
1722#ifdef NGEN_SAFE
1723 throw unsupported_instruction();
1724#endif
1725}
1726
1727template <HW hw>
1728typename BinaryCodeGenerator<hw>::InstructionStream *BinaryCodeGenerator<hw>::popStream()
1729{
1730#ifdef NGEN_SAFE
1731 if (streamStack.size() <= 1) throw stream_stack_underflow();
1732#endif
1733
1734 InstructionStream *result = streamStack.back();
1735 streamStack.pop_back();
1736 return result;
1737}
1738
1739template <HW hw>
1740static inline Instruction12 encodeSyncInsertion(autoswsb::SyncInsertion &si)
1741{
1742 Instruction12 i;
1743
1744 i.common.opcode = static_cast<int>(Opcode::sync);
1745 i.common.swsb = (hw >= HW::XeHPC) ? SWSBInfoXeHPC(si.swsb, Opcode::sync).raw()
1746 : SWSBInfo12(si.swsb, Opcode::sync).raw();
1747 i.common.maskCtrl = true;
1748 i.binary.cmod = static_cast<int>(si.fc);
1749
1750 if (si.mask) {
1751 i.binary.src0Type = getTypecode12(DataType::ud);
1752 i.binary.src0Imm = true;
1753 i.imm32.value = si.mask;
1754 }
1755 i.binary.dst = 1;
1756
1757 return i;
1758}
1759
1760template <HW hw>
1761std::vector<uint8_t> BinaryCodeGenerator<hw>::getCode()
1762{
1763#ifdef NGEN_SAFE
1764 if (streamStack.size() > 1) throw unfinished_stream_exception();
1765#endif
1766 rootStream.fixLabels(labelManager);
1767
1768 Program program(rootStream);
1769 autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hw, program);
1770 std::vector<uint8_t> result;
1771
1772 if (analysis.empty()) {
1773 result.resize(rootStream.length());
1774 std::memmove(result.data(), rootStream.code.data(), rootStream.length());
1775 } else {
1776 std::multimap<int32_t, autoswsb::SyncInsertion*> syncs;
1777
1778 for (auto &bb : analysis)
1779 for (auto &sync : bb.syncs)
1780 syncs.insert(std::make_pair(sync.inum, &sync));
1781
1782 result.resize(rootStream.length() + syncs.size() * sizeof(Instruction12));
1783
1784 auto *psrc = reinterpret_cast<const Instruction12 *>(rootStream.code.data());
1785 auto *pdst = reinterpret_cast<Instruction12 *>(result.data());
1786 auto nextSync = syncs.begin();
1787
1788 for (uint32_t isrc = 0; isrc < program.size(); isrc++, psrc++) {
1789 if (psrc->opcode() == Opcode::wrdep)
1790 continue;
1791 while ((nextSync != syncs.end()) && (nextSync->second->inum == isrc))
1792 *pdst++ = encodeSyncInsertion<hw>(*(nextSync++)->second);
1793 *pdst++ = *psrc;
1794 }
1795
1796 result.resize(reinterpret_cast<uint8_t *>(pdst) - result.data());
1797 }
1798
1799 return result;
1800}
1801
1802template <HW hw>
1803template <bool forceWE, typename D, typename S0, HW hw_>
1804typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
1805BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1806{
1807 Instruction8 i{};
1808 InstructionModifier emod = mod | defaultModifier;
1809 if (forceWE)
1810 emod |= NoMask;
1811
1812 dst.fixup(hw, emod.getExecSize(), defaultType, true, 1);
1813 src0.fixup(hw, emod.getExecSize(), defaultType, false, 1);
1814
1815 encodeCommon8(i, op, emod);
1816 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1817
1818 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1819 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1820
1821 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1822 if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1823
1824 i.binary.dstType = getTypecode<hw>(dst.getType());
1825 i.binary.src0Type = getTypecode<hw>(src0.getType());
1826
1827 i.binary.dstRegFile = getRegFile(dst);
1828 i.binary.src0RegFile = getRegFile(src0);
1829
1830 db(i);
1831}
1832
1833template <HW hw>
1834template <bool forceWE, typename D, typename S0, HW hw_>
1835typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
1836BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1837{
1838 typename EncodingTag12Dispatch<hw>::tag tag;
1839 Instruction12 i{};
1840
1841 InstructionModifier emod = mod | defaultModifier;
1842 if (forceWE)
1843 emod |= NoMask;
1844
1845 dst.fixup(hw, emod.getExecSize(), defaultType, true, 1);
1846 src0.fixup(hw, emod.getExecSize(), defaultType, false, 1);
1847
1848 encodeCommon12(i, op, emod, dst, tag);
1849
1850 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1851 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1852
1853 i.binary.dstAddrMode = dst.isIndirect();
1854 i.binary.dstType = getTypecode12(dst.getType());
1855 i.binary.src0Type = getTypecode12(src0.getType());
1856
1857 i.binary.src0Mods = src0.getMods();
1858
1859 i.binary.cmod = static_cast<int>(mod.getCMod());
1860
1861 db(i);
1862}
1863
1864template <HW hw>
1865template <bool forceWE, typename D, HW hw_>
1866typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
1867BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1868{
1869 Instruction8 i{};
1870 InstructionModifier emod = mod | defaultModifier;
1871 if (forceWE)
1872 emod |= NoMask;
1873
1874 dst.fixup(hw, emod.getExecSize(), defaultType, true, 1);
1875 src0.fixup(hw, emod.getExecSize(), defaultType, false, 1);
1876
1877 encodeCommon8(i, op, emod);
1878 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1879
1880 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1881
1882 i.binary.dstType = getTypecode<hw>(dst.getType());
1883 i.binary.src0Type = getImmediateTypecode<hw>(src0.getType());
1884
1885 i.binary.dstRegFile = getRegFile(dst);
1886 i.binary.src0RegFile = getRegFile(src0);
1887
1888 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1889
1890 if (getBytes(src0.getType()) == 8)
1891 i.imm64.value = static_cast<uint64_t>(src0);
1892 else
1893 i.imm32.value = static_cast<uint64_t>(src0);
1894
1895 db(i);
1896}
1897
1898template <HW hw>
1899template <bool forceWE, typename D, HW hw_>
1900typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
1901BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1902{
1903 typename EncodingTag12Dispatch<hw>::tag tag;
1904 Instruction12 i{};
1905
1906 InstructionModifier emod = mod | defaultModifier;
1907 if (forceWE)
1908 emod |= NoMask;
1909
1910 dst.fixup(hw, emod.getExecSize(), defaultType, true, 1);
1911 src0.fixup(hw, emod.getExecSize(), defaultType, false, 1);
1912
1913 encodeCommon12(i, op, emod, dst, tag);
1914
1915 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1916
1917 i.binary.dstAddrMode = dst.isIndirect();
1918
1919 i.binary.dstType = getTypecode12(dst.getType());
1920 i.binary.src0Type = getTypecode12(src0.getType());
1921
1922 i.binary.src0Imm = true;
1923
1924 i.binary.cmod = static_cast<int>(mod.getCMod());
1925
1926 auto val = static_cast<uint64_t>(src0);
1927 i.imm32.value = val;
1928 if (getBytes(src0.getType()) == 8) {
1929#ifdef NGEN_SAFE
1930 if (mod.getCMod() != ConditionModifier::none) throw invalid_modifiers_exception();
1931#endif
1932 i.imm64.high = val >> 32;
1933 }
1934
1935 db(i);
1936}
1937
1938template <HW hw>
1939template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1940typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
1941BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1942{
1943 Instruction8 i{};
1944
1945 InstructionModifier emod = mod | defaultModifier;
1946 if (forceWE)
1947 emod |= NoMask;
1948
1949 dst.fixup(hw, emod.getExecSize(), defaultType, true, 2);
1950 src0.fixup(hw, emod.getExecSize(), defaultType, false, 2);
1951 src1.fixup(hw, emod.getExecSize(), defaultType, false, 2);
1952
1953 encodeCommon8(i, op, emod);
1954 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1955
1956 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1957 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1958 i.binary.src1 = encodeBinaryOperand8<false>(src1).bits;
1959
1960 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1961 if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1962 if (src1.isIndirect()) i.binary.src1AddrImm9 = src1.getOffset() >> 9;
1963
1964 i.binary.dstType = getTypecode<hw>(dst.getType());
1965 i.binary.src0Type = getTypecode<hw>(src0.getType());
1966 i.binary.src1Type = getTypecode<hw>(src1.getType());
1967
1968 i.binary.dstRegFile = getRegFile(dst);
1969 i.binary.src0RegFile = getRegFile(src0);
1970 i.binary.src1RegFile = getRegFile(src1);
1971
1972#ifdef NGEN_SAFE
1973 if (src1.isARF() && op != Opcode::illegal && op != Opcode::movi) throw grf_expected_exception();
1974#endif
1975
1976 db(i);
1977}
1978
1979template <HW hw>
1980template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1981typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
1982BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1983{
1984 typename EncodingTag12Dispatch<hw>::tag tag;
1985 Instruction12 i{};
1986
1987 InstructionModifier emod = mod | defaultModifier;
1988 if (forceWE)
1989 emod |= NoMask;
1990
1991 dst.fixup(hw, emod.getExecSize(), defaultType, true, 2);
1992 src0.fixup(hw, emod.getExecSize(), defaultType, false, 2);
1993 src1.fixup(hw, emod.getExecSize(), defaultType, false, 2);
1994
1995 encodeCommon12(i, op, emod, dst, tag);
1996
1997 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1998 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1999 i.binary.src1 = encodeBinaryOperand12<false>(src1, tag).bits;
2000
2001 i.binary.dstAddrMode = dst.isIndirect();
2002 i.binary.dstType = getTypecode12(dst.getType());
2003 i.binary.src0Type = getTypecode12(src0.getType());
2004 i.binary.src1Type = getTypecode12(src1.getType());
2005
2006 i.binary.src0Mods = src0.getMods();
2007 i.binary.src1Mods = src1.getMods();
2008
2009 i.binary.cmod = static_cast<int>(mod.getCMod());
2010
2011 db(i);
2012}
2013
2014template <HW hw>
2015template <bool forceWE, typename D, typename S0, HW hw_>
2016typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2017BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
2018{
2019 Instruction8 i{};
2020 InstructionModifier emod = mod | defaultModifier;
2021 if (forceWE)
2022 emod |= NoMask;
2023
2024 dst.fixup(hw, emod.getExecSize(), defaultType, true, 2);
2025 src0.fixup(hw, emod.getExecSize(), defaultType, false, 2);
2026 src1.fixup(hw, emod.getExecSize(), defaultType, false, 2);
2027
2028 encodeCommon8(i, op, emod);
2029 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
2030
2031 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2032 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2033
2034 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
2035 if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
2036
2037 i.binary.dstType = getTypecode<hw>(dst.getType());
2038 i.binary.src0Type = getTypecode<hw>(src0.getType());
2039 i.binary.src1Type = getImmediateTypecode<hw>(src1.getType());
2040
2041 i.binary.dstRegFile = getRegFile(dst);
2042 i.binary.src0RegFile = getRegFile(src0);
2043 i.binary.src1RegFile = getRegFile(src1);
2044
2045 i.imm32.value = static_cast<uint64_t>(src1);
2046
2047 db(i);
2048}
2049
2050template <HW hw>
2051template <bool forceWE, typename D, typename S0, HW hw_>
2052typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2053BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
2054{
2055 typename EncodingTag12Dispatch<hw>::tag tag;
2056 Instruction12 i{};
2057
2058 InstructionModifier emod = mod | defaultModifier;
2059 if (forceWE)
2060 emod |= NoMask;
2061
2062 dst.fixup(hw, emod.getExecSize(), defaultType, true, 2);
2063 src0.fixup(hw, emod.getExecSize(), defaultType, false, 2);
2064 src1.fixup(hw, emod.getExecSize(), defaultType, false, 2);
2065
2066 encodeCommon12(i, op, emod, dst, tag);
2067
2068 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
2069 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
2070 i.binary.src1 = static_cast<uint64_t>(src1);
2071
2072 i.binary.dstAddrMode = dst.isIndirect();
2073 i.binary.dstType = getTypecode12(dst.getType());
2074 i.binary.src0Type = getTypecode12(src0.getType());
2075 i.binary.src1Type = getTypecode12(src1.getType());
2076
2077 i.binary.src0Mods = src0.getMods();
2078
2079 i.binary.cmod = static_cast<int>(mod.getCMod());
2080
2081 i.binary.src1Imm = true;
2082 i.imm32.value = static_cast<uint64_t>(src1);
2083
2084 db(i);
2085}
2086
2087template <HW hw>
2088template <HW hw_>
2089typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
2090BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2)
2091{
2092 opX(op, defaultType, mod, emulateAlign16Dst(dst), emulateAlign16Src(src0),
2093 emulateAlign16Src(src1), emulateAlign16Src(src2));
2094}
2095
2096
2097template <HW hw>
2098template <HW hw_>
2099typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2100BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2)
2101{
2102#ifdef NGEN_SAFE
2103 if (dst.getReg().isARF()) throw grf_expected_exception();
2104 if (src0.getReg().isARF()) throw grf_expected_exception();
2105 if (src1.getReg().isARF()) throw grf_expected_exception();
2106 if (src2.getReg().isARF()) throw grf_expected_exception();
2107#endif
2108
2109 Instruction8 i{};
2110 InstructionModifier emod = mod | defaultModifier | Align16;
2111
2112 dst.getReg().fixup(hw, emod.getExecSize(), defaultType, true, 3);
2113 src0.getReg().fixup(hw, emod.getExecSize(), defaultType, false, 3);
2114 src1.getReg().fixup(hw, emod.getExecSize(), defaultType, false, 3);
2115 src2.getReg().fixup(hw, emod.getExecSize(), defaultType, false, 3);
2116
2117 encodeCommon8(i, op, emod);
2118
2119 i.ternary16.dstChanEn = dst.getChanEn();
2120 i.ternary16.dstRegNum = dst.getReg().getBase();
2121 i.ternary16.dstSubregNum2_4 = dst.getReg().getByteOffset() >> 2;
2122 i.ternary16.dstType = getTernary16Typecode8(dst.getReg().getType());
2123
2124 i.ternary16.srcType = getTernary16Typecode8(src0.getReg().getType());
2125
2126 bool isFOrHF = (src0.getReg().getType() == DataType::f
2127 || src0.getReg().getType() == DataType::hf);
2128
2129 i.ternary16.src1Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
2130 i.ternary16.src2Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
2131
2132 encodeTernaryCommon8(i, src0, src1, src2);
2133
2134 db(i);
2135}
2136
2137template <HW hw>
2138template <typename D, typename S0, typename S1, typename S2, HW hw_>
2139typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2140BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
2141{
2142 if (hw < HW::Gen10)
2143 unsupported();
2144
2145#ifdef NGEN_SAFE
2146 if (src0.isARF()) throw grf_expected_exception();
2147 if (src2.isARF()) throw grf_expected_exception();
2148#endif
2149
2150 Instruction8 i{};
2151 InstructionModifier emod = mod | defaultModifier;
2152
2153 dst.fixup(hw, emod.getExecSize(), defaultType, true, 3);
2154 src0.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2155 src1.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2156 src2.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2157
2158 encodeCommon8(i, op, emod);
2159
2160 i.ternary1.src0RegFile = std::is_base_of<Immediate, S0>::value;
2161 i.ternary1.src1RegFile = src1.isARF();
2162 i.ternary1.src2RegFile = std::is_base_of<Immediate, S2>::value;
2163
2164 encodeTernaryCommon8(i, src0, src1, src2);
2165 encodeTernary1Dst10(i, dst);
2166
2167 db(i);
2168}
2169
2170template <HW hw>
2171template <typename D, typename S0,typename S1, typename S2, HW hw_>
2172typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2173BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
2174{
2175 typename EncodingTag12Dispatch<hw>::tag tag;
2176 Instruction12 i{};
2177 InstructionModifier emod = mod | defaultModifier;
2178
2179 dst.fixup(hw, emod.getExecSize(), defaultType, true, 3);
2180 src0.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2181 src1.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2182 src2.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2183
2184 encodeCommon12(i, op, emod, dst, tag);
2185
2186 i.ternary.dst = encodeTernaryOperand12<true>(dst, tag).bits;
2187 encodeTernarySrc0(i, src0, tag);
2188 encodeTernarySrc1(i, src1, tag);
2189 encodeTernarySrc2(i, src2, tag);
2190 encodeTernaryTypes(i, dst, src0, src1, src2);
2191
2192 i.ternary.cmod = static_cast<int>(mod.getCMod());
2193
2194 db(i);
2195}
2196
2197template <HW hw>
2198template <typename DS0>
2199void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0)
2200{
2201 InstructionModifier mmod = mod;
2202
2203 mmod.setCMod(static_cast<ConditionModifier>(fc));
2204 opX(op, defaultType, mmod, dst, src0);
2205}
2206
2207template <HW hw>
2208template <typename DS0, typename S1>
2209void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1)
2210{
2211 InstructionModifier mmod = mod;
2212
2213 mmod.setCMod(static_cast<ConditionModifier>(fc));
2214 opX(op, defaultType, mmod, dst, src0, src1);
2215}
2216
2217template <HW hw>
2218template <typename D, typename S0, typename S2>
2219void BinaryCodeGenerator<hw>::opBfn(Opcode op, DataType defaultType, const InstructionModifier &mod, int bfnCtrl, D dst, S0 src0, RegData src1, S2 src2)
2220{
2221 if (hw < HW::XeHP)
2222 unsupported();
2223
2224 typename EncodingTag12Dispatch<hw>::tag tag;
2225 Instruction12 i{};
2226 InstructionModifier emod = mod | defaultModifier;
2227
2228 dst.fixup(hw, emod.getExecSize(), defaultType, true, 3);
2229 src0.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2230 src1.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2231 src2.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2232
2233 encodeCommon12(i, op, emod, dst, tag);
2234
2235 i.ternary.dst = encodeTernaryOperand12<true>(dst, tag).bits;
2236 encodeTernarySrc0(i, src0, tag);
2237 encodeTernarySrc1(i, src1, tag);
2238 encodeTernarySrc2(i, src2, tag);
2239 encodeTernaryTypes(i, dst, src0, src1, src2);
2240
2241 i.ternary.cmod = static_cast<int>(mod.getCMod());
2242
2243 i.bfn.bfnCtrl03 = (bfnCtrl >> 0);
2244 i.bfn.bfnCtrl47 = (bfnCtrl >> 4);
2245
2246 db(i);
2247}
2248
2249template <HW hw>
2250void BinaryCodeGenerator<hw>::opDpas(Opcode op, DataType defaultType, const InstructionModifier &mod, int sdepth, int rcount, RegData dst, RegData src0, RegData src1, RegData src2)
2251{
2252 if (hw < HW::XeHP)
2253 unsupported();
2254
2255 typename EncodingTag12Dispatch<hw>::tag tag;
2256 Instruction12 i{};
2257 InstructionModifier emod = mod | defaultModifier;
2258
2259 dst.fixup(hw, emod.getExecSize(), defaultType, true, 3);
2260 src0.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2261 src1.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2262 src2.fixup(hw, emod.getExecSize(), defaultType, false, 3);
2263
2264 encodeCommon12(i, op, emod, dst, tag);
2265
2266 i.ternary.dst = encodeTernaryOperand12<true, false>(dst, tag).bits;
2267 i.ternary.src0 = encodeTernaryOperand12<false, false>(src0, tag).bits;
2268 i.ternary.src1 = encodeTernaryOperand12<false, false>(src1, tag).bits;
2269 i.ternary.src2 = encodeTernaryOperand12<false, false>(src2, tag).bits;
2270
2271 encodeTernaryTypes(i, dst, src0, src1, src2);
2272
2273 i.dpas.rcount = rcount - 1;
2274 i.dpas.sdepth = utils::log2(sdepth);
2275
2276 // i.dpas.src1SubBytePrecision = 0; // TODO: 0 -> (none), 1 -> u4/s4, 2 -> u2/s2
2277 // i.dpas.src2SubBytePrecision = 0;
2278
2279 i.ternary.cmod = static_cast<int>(mod.getCMod());
2280
2281 db(i);
2282}
2283
2284template <HW hw>
2285template <typename D, HW hw_>
2286typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2287BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
2288{
2289 exdesc |= uint32_t(static_cast<uint8_t>(sfid));
2290 opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
2291}
2292
2293template <HW hw>
2294template <typename D, HW hw_>
2295typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2296BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc)
2297{
2298 opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
2299}
2300
2301template <HW hw>
2302template <typename ED, typename D, HW hw_>
2303typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2304BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
2305{
2306 typename EncodingTag12Dispatch<hw>::tag tag;
2307 Instruction12 i{};
2308 InstructionModifier emod = mod | defaultModifier;
2309
2310 encodeCommon12(i, op, emod, dst, tag);
2311
2312 i.send.fusionCtrl = emod.isSerialized();
2313
2314 i.send.dstReg = dst.getBase();
2315 i.send.src0Reg = src0.getBase();
2316 i.send.src1Reg = src1.getBase();
2317
2318 i.send.dstRegFile = getRegFile(dst);
2319 i.send.src0RegFile = getRegFile(src0);
2320 i.send.src1RegFile = getRegFile(src1);
2321
2322 i.send.sfid = static_cast<int>(sfid) & 0xF;
2323
2324 encodeSendDesc(i, desc);
2325 encodeSendExDesc(i, exdesc);
2326
2327 db(i);
2328}
2329
2330template <HW hw>
2331template <HW hw_>
2332typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2333BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc)
2334{
2335 Instruction8 i{};
2336 InstructionModifier emod = mod | defaultModifier;
2337
2338 encodeCommon8(i, op, emod);
2339
2340 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2341 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2342
2343 i.sendsGen9.dstRegFile = getRegFile(dst);
2344 i.binary.src0RegFile = getRegFile(src0);
2345 i.binary.src1RegFile = RegFileIMM;
2346
2347 i.binary.dstType = getTypecode<hw>(dst.getType());
2348
2349 i.sendsGen9.sfid = exdesc & 0xF;
2350 i.sendGen8.zero = 0;
2351 i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
2352 i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
2353 i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
2354 i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
2355 i.sendsGen9.desc = desc;
2356
2357 i.sendsGen9.eot = (exdesc >> 5) & 1;
2358 if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2359
2360 db(i);
2361}
2362
2363template <HW hw>
2364template <HW hw_>
2365typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2366BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc)
2367{
2368#ifdef NGEN_SAFE
2369 // Only a0.0:ud is allowed for desc.
2370 if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0)
2371 throw invalid_arf_exception();
2372#endif
2373 Instruction8 i{};
2374 InstructionModifier emod = mod | defaultModifier;
2375
2376 encodeCommon8(i, op, emod);
2377
2378 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2379 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2380 i.binary.src1 = encodeBinaryOperand8<false>(desc).bits;
2381
2382 i.sendsGen9.dstRegFile = getRegFile(dst);
2383 i.binary.src0RegFile = getRegFile(src0);
2384 i.binary.src1RegFile = getRegFile(desc);
2385 i.binary.src1Type = getTypecode<hw>(desc.getType());
2386
2387 i.sendsGen9.sfid = exdesc & 0xF;
2388 i.sendGen8.zero = 0;
2389 i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
2390 i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
2391 i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
2392 i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
2393
2394 i.sendsGen9.eot = (exdesc >> 5) & 1;
2395 if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2396
2397 db(i);
2398}
2399
2400template <HW hw>
2401template <typename D, HW hw_>
2402typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2403BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc)
2404{
2405 opSends(op, mod, dst, src0, null, exdesc, desc);
2406}
2407
2408template <HW hw>
2409template <typename ED, typename D, HW hw_>
2410typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2411BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
2412{
2413 Instruction8 i{};
2414 InstructionModifier emod = mod | defaultModifier;
2415
2416 encodeCommon8(i, op, emod);
2417
2418 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2419 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2420
2421 i.binary.src0RegFile = 0; // ?
2422 i.sendsGen9.dstRegFile = getRegFile(dst);
2423 i.sendsGen9.src1RegFile = getRegFile(src1);
2424 i.sendsGen9.src1RegNum = src1.getBase();
2425
2426 if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2427 if (src0.isIndirect()) i.sendsGen9.src0AddrImm9 = src0.getOffset() >> 9;
2428
2429 encodeSendsDesc(i, desc);
2430 encodeSendsExDesc(i, exdesc);
2431
2432 db(i);
2433}
2434
2435template <HW hw>
2436template <typename D, HW hw_>
2437typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2438BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc)
2439{
2440#ifdef NGEN_SAFE
2441 throw sfid_needed_exception();
2442#endif
2443}
2444
2445template <HW hw>
2446template <typename D, HW hw_>
2447typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2448BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
2449{
2450 Opcode mop = static_cast<Opcode>(static_cast<int>(op) & ~2);
2451 opSend(mop, mod, static_cast<SharedFunction>(exdesc & 0x1F), dst, src0, src1, exdesc, desc);
2452}
2453
2454template <HW hw>
2455template <HW hw_>
2456typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2457BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2458{
2459 Instruction8 i{};
2460 InstructionModifier emod = mod | defaultModifier;
2461
2462 encodeCommon8(i, op, emod);
2463
2464 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2465 i.binary.dstRegFile = getRegFile(dst);
2466 i.binary.dstType = getTypecode<hw>(dst.getType());
2467 i.binary.src0RegFile = getRegFile(Immediate());
2468 i.binary.src0Type = getTypecode<hw>(DataType::d);
2469 i.branches.jip = jip;
2470 i.branches.uip = uip;
2471
2472 db(i);
2473}
2474
2475template <HW hw>
2476template <HW hw_>
2477typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2478BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2479{
2480 typename EncodingTag12Dispatch<hw>::tag tag;
2481 Instruction12 i{};
2482 InstructionModifier emod = mod | defaultModifier;
2483
2484 encodeCommon12(i, op, emod, dst, tag);
2485
2486 i.branches.branchCtrl = emod.getBranchCtrl();
2487
2488 i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2489
2490 i.binary.src0Imm = true;
2491 i.binary.src1Imm = true;
2492
2493 i.branches.jip = jip;
2494 i.branches.uip = uip;
2495
2496 db(i);
2497}
2498
2499template <HW hw>
2500template <bool forceWE, HW hw_>
2501typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2502BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2503{
2504 Instruction8 i{};
2505 InstructionModifier emod = mod | defaultModifier;
2506 if (forceWE)
2507 emod |= NoMask;
2508
2509 encodeCommon8(i, op, emod);
2510
2511 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2512 i.binary.dstRegFile = getRegFile(dst);
2513 i.binary.dstType = getTypecode<hw>(dst.getType());
2514 i.binary.src1RegFile = RegFileIMM;
2515 i.binary.src1Type = getTypecode<hw>(DataType::d);
2516 i.branches.jip = jip;
2517
2518 db(i);
2519}
2520
2521template <HW hw>
2522template <bool forceWE, HW hw_>
2523typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2524BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2525{
2526 typename EncodingTag12Dispatch<hw>::tag tag;
2527 Instruction12 i{};
2528 InstructionModifier emod = mod | defaultModifier;
2529 if (forceWE)
2530 emod |= NoMask;
2531
2532 encodeCommon12(i, op, emod, dst, tag);
2533
2534 i.branches.branchCtrl = emod.getBranchCtrl();
2535
2536 i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2537 i.binary.src0Imm = true;
2538 i.branches.jip = jip;
2539
2540 db(i);
2541}
2542
2543template <HW hw>
2544template <bool forceWE, bool small12, HW hw_>
2545typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2546BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2547{
2548 Instruction8 i{};
2549 InstructionModifier emod = mod | defaultModifier;
2550 if (forceWE)
2551 emod |= NoMask;
2552
2553 encodeCommon8(i, op, emod);
2554
2555 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2556 i.binary.dstRegFile = getRegFile(dst);
2557 i.binary.dstType = getTypecode<hw>(DataType::d);
2558 i.binary.src0RegFile = getRegFile(src0);
2559 i.binary.src0Type = getTypecode<hw>(DataType::d);
2560 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2561
2562 db(i);
2563}
2564
2565template <HW hw>
2566template <bool forceWE, bool small12, HW hw_>
2567typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2568BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2569{
2570 typename EncodingTag12Dispatch<hw>::tag tag;
2571 Instruction12 i{};
2572 InstructionModifier emod = mod | defaultModifier;
2573 if (forceWE)
2574 emod |= NoMask;
2575
2576 encodeCommon12(i, op, emod, dst, tag);
2577
2578 i.branches.branchCtrl = emod.getBranchCtrl();
2579
2580 i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2581 i.binary.src0 = encodeBinaryOperand12<false, false>(src0, tag).bits;
2582 if (small12)
2583 i.binary.src0 &= 0xFFFF;
2584
2585 db(i);
2586}
2587
2588template <HW hw>
2589void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip)
2590{
2591 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2592 addFixup(LabelFixup(uip.getID(labelManager), LabelFixup::UIPOffset));
2593 opBranch(op, mod, dst, 0, 0);
2594}
2595
2596template <HW hw>
2597template <bool forceWE>
2598void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2599{
2600 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2601 opBranch<forceWE>(op, mod, dst, 0);
2602}
2603
2604template <HW hw>
2605void BinaryCodeGenerator<hw>::opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2606{
2607 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2608 if (isGen12)
2609 opBranch<true>(op, mod, dst, 0);
2610 else
2611 opX<true>(op, DataType::d, mod, dst, null.ud(0)(0, 1, 0), Immediate::d(0));
2612}
2613
2614template <HW hw>
2615template <HW hw_>
2616typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
2617BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2618{
2619 Instruction8 i{};
2620 InstructionModifier emod = mod | defaultModifier | NoMask;
2621
2622 encodeCommon8(i, op, emod);
2623
2624 src0.fixup(hw, emod.getExecSize(), DataType::d, false, 2);
2625
2626 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2627 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2628 i.binary.src0RegFile = getRegFile(src0);
2629 i.binary.src1RegFile = RegFileIMM;
2630 i.binary.src1Type = getTypecode<hw>(DataType::d);
2631
2632 i.branches.jip = jip;
2633
2634 db(i);
2635}
2636
2637template <HW hw>
2638template <HW hw_>
2639typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
2640BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2641{
2642 opBranch<true>(op, mod, dst, jip);
2643}
2644
2645template <HW hw>
2646void BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip)
2647{
2648 if (hw >= HW::Gen12LP)
2649 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2650 opJmpi(op, mod, dst, src0, 0);
2651 if (hw < HW::Gen12LP)
2652 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffsetJMPI));
2653}
2654
2655template <HW hw>
2656void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod)
2657{
2658 if (hw < HW::Gen12LP)
2659 unsupported();
2660
2661 typename EncodingTag12Dispatch<hw>::tag tag;
2662 Instruction12 i{};
2663 InstructionModifier emod = mod | defaultModifier;
2664
2665 encodeCommon12(i, op, emod, null, tag);
2666
2667 i.binary.dst = 0x1;
2668 i.binary.cmod = static_cast<int>(fc);
2669
2670 db(i);
2671}
2672
2673template <HW hw>
2674void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0)
2675{
2676 typename EncodingTag12Dispatch<hw>::tag tag;
2677 if (hw < HW::Gen12LP)
2678 unsupported();
2679
2680 Instruction12 i{};
2681 InstructionModifier emod = mod | defaultModifier;
2682
2683 encodeCommon12(i, op, emod, null, tag);
2684
2685 i.binary.dst = 0x1;
2686 if (!src0.isNull()) {
2687 src0.setRegion(0, 1, 0);
2688 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
2689 i.binary.src0Type = getTypecode12(src0.getType());
2690 }
2691 i.binary.cmod = static_cast<int>(fc);
2692
2693 db(i);
2694}
2695
2696template <HW hw>
2697void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0)
2698{
2699 if (hw < HW::Gen12LP)
2700 unsupported();
2701
2702 typename EncodingTag12Dispatch<hw>::tag tag;
2703 Instruction12 i{};
2704 InstructionModifier emod = mod | defaultModifier;
2705
2706 encodeCommon12(i, op, emod, null, tag);
2707
2708 i.binary.dst = 0x1;
2709 i.binary.src0Type = getTypecode12(src0.getType());
2710 i.binary.src0Imm = true;
2711 i.binary.cmod = static_cast<int>(fc);
2712
2713 i.imm32.value = static_cast<uint64_t>(src0);
2714
2715 db(i);
2716}
2717
2718template <HW hw>
2719void BinaryCodeGenerator<hw>::opNop(Opcode op)
2720{
2721 Instruction8 i{};
2722
2723 i.qword[0] = static_cast<int>(op);
2724 i.qword[1] = 0;
2725
2726 db(i);
2727}
2728
2729} /* namespace ngen */
2730
2731#endif /* header guard */
2732