1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef NGEN_CORE_HPP
18#define NGEN_CORE_HPP
19
20
21#include <cstdint>
22#include <vector>
23#include <algorithm>
24#include <type_traits>
25
26#include "ngen_utils.hpp"
27
28#ifndef NGEN_NO_OP_NAMES
29#if not +0
30#error Compile with -fno-operator-names [Linux/OS X] or without /Za [Windows] if you want to use and(), or(), xor(), or define NGEN_NO_OP_NAMES and use and_(), or_(), xor_().
31#endif
32#endif
33
34#ifdef NGEN_ASM
35#include <ostream>
36#endif
37
38#ifdef NGEN_SAFE
39#include <stdexcept>
40#endif
41
42/*
43 Syntax
44 ------
45
46 Register Syntax Overview
47 r17 Plain register
48 r17.f(4) -> r17.4:f
49 In fact, r17.4<0;1,0>:f, as subregisters default to
50 being scalar
51 r17.sub<float>(4) Same as above, allowing for C++ templating.
52 r17.f() -> r17.0:f (defaults to offset 0)
53 r17.sub<float>() Same as above
54 r17.df(3)(8,8,1) Register regioning (vertical stride, width, horizontal stride)
55 r17.df(3)(8,1) (Width, horiz. stride): vertical stride is inferred
56 r17.df(3)(1) Horizontal stride only: width, vertical stride inferred from execution size.
57 r[a0.w(8)].f(4,4,1) Indirect addressing: VxH (if NGEN_SHORT_NAMES defined otherwise use indirect[a0...])
58 r[a0.w(8)].f(4,1) Indirect addressing: Vx1
59 -r17.q(1) Source modifier: negation
60 abs(r17) Source modifier: absolute value. Note that abs is defined in namespace ngen.
61 -abs(r3)
62 ~r17 Alternative syntax to -r17 for logical operations.
63 r17 + 3 ...is r20. Operators ++ and += are defined similarly.
64
65 Command Syntax Overview
66 add(8, r3.f(0)(8,8,1), r9.f(0)(8,8,1), r12.f(0)(0,1,0)) -> add (8) r3.0<8;8,1>:f r9.0<8;8,1>:f r12.f<0;1,0>
67 add(8, r3.f(), r9.f(), r12.f()) Same as above. Register regions default to unit stride.
68 add<float>(8, r3, r9, r12) A default operand data type can be provided.
69 add<uint32_t>(8, r3, r9, r12.uw(8)(0,1,0)) Default operand types can be overridden.
70 add<float>(8, r3, r9, 3.14159f) The data type of scalar immediate values is inferred.
71 add<int32_t>(8, r3, r9, int16_t(12)) Here an int16_t immediate is mapped to the :w data type.
72 mul<float>(8, r3, r9, Immediate::vf(-1.0,1.0,-1.0,1.25)) Vector immediates require helper functions.
73 mov(8, r2.d(), Immediate::uv(7,6,5,4,3,2,1,0))
74 mov(8, r2.d(), Immediate::v(7,-6,5,-4,3,-2,1,0))
75
76 All modifiers for an instruction go in the first parameter, OR'ed together.
77 add(8 | M0, ...)
78 add(8 | W | ~f0.w(0) | sat, ...) Use NoMask instead of W if NGEN_SHORT_NAMES not defined.
79 add(8 | lt | f1_0, ...)
80 add(8 | ~any2h | f1, ...)
81 */
82
83namespace ngen {
84
85#ifdef NGEN_SAFE
86static constexpr bool _safe_ = 1;
87#else
88static constexpr bool _safe_ = 0;
89#endif
90
91// Forward declarations.
92class RegData;
93class Register;
94class GRFDisp;
95class Subregister;
96class RegisterRegion;
97class NullRegister;
98class InstructionModifier;
99struct Instruction12;
100enum class Opcode;
101
102struct EncodingTag12;
103static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag);
104struct EncodingTagXeHPC;
105static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag);
106
107// Exceptions, used when NGEN_SAFE is defined.
108
109#ifdef NGEN_SAFE
110class invalid_type_exception : public std::runtime_error {
111public:
112 invalid_type_exception() : std::runtime_error("Instruction does not support this type or combination of types") {}
113};
114class invalid_object_exception : public std::runtime_error {
115public:
116 invalid_object_exception() : std::runtime_error("Object is invalid") {}
117};
118class invalid_immediate_exception : public std::runtime_error {
119public:
120 invalid_immediate_exception() : std::runtime_error("Invalid immediate value") {}
121};
122class invalid_modifiers_exception : public std::runtime_error {
123public:
124 invalid_modifiers_exception() : std::runtime_error("Invalid or conflicting modifiers") {}
125};
126class invalid_operand_exception : public std::runtime_error {
127public:
128 invalid_operand_exception() : std::runtime_error("Invalid operand to instruction") {}
129};
130class invalid_operand_count_exception : public std::runtime_error {
131public:
132 invalid_operand_count_exception() : std::runtime_error("Invalid operand count") {}
133};
134class invalid_arf_exception : public std::runtime_error {
135public:
136 invalid_arf_exception() : std::runtime_error("Invalid ARF specified") {}
137};
138class grf_expected_exception : public std::runtime_error {
139public:
140 grf_expected_exception() : std::runtime_error("GRF expected, but found an ARF") {}
141};
142class invalid_model_exception : public std::runtime_error {
143public:
144 invalid_model_exception() : std::runtime_error("Invalid addressing model specified") {}
145};
146class invalid_load_store_exception : public std::runtime_error {
147public:
148 invalid_load_store_exception() : std::runtime_error("Invalid operands for load/store/atomic") {}
149};
150class invalid_range_exception : public std::runtime_error {
151public:
152 invalid_range_exception() : std::runtime_error("Invalid register range") {}
153};
154class invalid_region_exception : public std::runtime_error {
155public:
156 invalid_region_exception() : std::runtime_error("Unsupported register region") {}
157};
158class missing_type_exception : public std::runtime_error {
159public:
160 missing_type_exception() : std::runtime_error("Operand is missing its type") {}
161};
162class read_only_exception : public std::runtime_error {
163public:
164 read_only_exception() : std::runtime_error("Memory model is read-only") {}
165};
166class stream_stack_underflow : public std::runtime_error {
167public:
168 stream_stack_underflow() : std::runtime_error("Stream stack underflow occurred") {}
169};
170class unfinished_stream_exception : public std::runtime_error {
171public:
172 unfinished_stream_exception() : std::runtime_error("An unfinished instruction stream is still active") {}
173};
174class dangling_label_exception : public std::runtime_error {
175public:
176 dangling_label_exception() : std::runtime_error("A label was referenced, but its location was not defined") {}
177};
178class multiple_label_exception : public std::runtime_error {
179public:
180 multiple_label_exception() : std::runtime_error("Label already has a location") {}
181};
182class unsupported_instruction : public std::runtime_error {
183public:
184 unsupported_instruction() : std::runtime_error("Instruction is not supported by the chosen hardware") {}
185};
186class unsupported_message : public std::runtime_error {
187public:
188 unsupported_message() : std::runtime_error("Message is not supported by the chosen hardware") {}
189};
190class iga_align16_exception : public std::runtime_error {
191public:
192 iga_align16_exception() : std::runtime_error("Align16 not supported by the IGA assembler; use binary output") {}
193};
194class sfid_needed_exception : public std::runtime_error {
195public:
196 sfid_needed_exception() : std::runtime_error("SFID must be specified on Gen12+") {}
197};
198class invalid_execution_size_exception : public std::runtime_error {
199public:
200 invalid_execution_size_exception() : std::runtime_error("Invalid execution size") {}
201};
202#endif
203
204// Gen hardware generations.
205enum class HW {
206 Unknown,
207 Gen9,
208 Gen10,
209 Gen11,
210 XeLP,
211 Gen12LP = XeLP,
212 XeHP,
213 XeHPG,
214 XeHPC,
215};
216
217// Stepping IDs.
218enum {
219 SteppingPVCXTA0 = 3,
220 SteppingPVCXTB0 = 5,
221 SteppingPVCXTB4 = 7,
222};
223
224// Data types. Bits[0:4] are the ID, bits[5:7] hold log2(width in bytes).
225enum class DataType : uint8_t {
226 ud = 0x40,
227 d = 0x41,
228 uw = 0x22,
229 w = 0x23,
230 ub = 0x04,
231 b = 0x05,
232 df = 0x66,
233 f = 0x47,
234 uq = 0x68,
235 q = 0x69,
236 hf = 0x2A,
237 bf = 0x2B,
238 uv = 0x4D,
239 v = 0x4E,
240 vf = 0x4F,
241 bf8 = 0x0C,
242 tf32 = 0x50,
243 invalid = 0x00
244};
245
246#ifdef NGEN_ASM
247static inline std::ostream &operator<<(std::ostream &str, DataType type)
248{
249 static const char *names[32] = {"ud", "d", "uw", "w", "ub", "b", "df", "f", "uq", "q", "hf", "bf", "bf8", "uv", "v", "vf",
250 "tf32", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""};
251 str << names[static_cast<uint8_t>(type) & 0x1F];
252 return str;
253}
254#endif
255
256static inline constexpr int getLog2Bytes(DataType type) { return static_cast<int>(type) >> 5; }
257static inline constexpr int getBytes(DataType type) { return 1 << getLog2Bytes(type); }
258static inline constexpr14 int getDwords(DataType type) { return std::max<int>(getBytes(type) >> 2, 1); }
259
260static inline constexpr bool isSigned(DataType type)
261{
262 return !(type == DataType::ub || type == DataType::uw || type == DataType::ud || type == DataType::uq);
263}
264
265template <typename T> static inline DataType getDataType() { return DataType::invalid; }
266
267template <> inline DataType getDataType<uint64_t>() { return DataType::uq; }
268template <> inline DataType getDataType<int64_t>() { return DataType::q; }
269template <> inline DataType getDataType<uint32_t>() { return DataType::ud; }
270template <> inline DataType getDataType<int32_t>() { return DataType::d; }
271template <> inline DataType getDataType<uint16_t>() { return DataType::uw; }
272template <> inline DataType getDataType<int16_t>() { return DataType::w; }
273template <> inline DataType getDataType<uint8_t>() { return DataType::ub; }
274template <> inline DataType getDataType<int8_t>() { return DataType::b; }
275template <> inline DataType getDataType<double>() { return DataType::df; }
276template <> inline DataType getDataType<float>() { return DataType::f; }
277#ifdef NGEN_HALF_TYPE
278template <> inline DataType getDataType<half>() { return DataType::hf; }
279#endif
280#ifdef NGEN_BFLOAT16_TYPE
281template <> inline DataType getDataType<bfloat16>() { return DataType::bf; }
282#endif
283#ifdef NGEN_BFLOAT8_TYPE
284template <> inline DataType getDataType<bfloat8>() { return DataType::bf8; }
285#endif
286#ifdef NGEN_TFLOAT32_TYPE
287template <> inline DataType getDataType<tfloat32>() { return DataType::tf32; }
288#endif
289
290// Math function codes.
291enum class MathFunction : uint8_t {
292 inv = 1,
293 log = 2,
294 exp = 3,
295 sqt = 4,
296 rsqt = 5,
297 sin = 6,
298 cos = 7,
299 fdiv = 9,
300 pow = 10,
301 idiv = 11,
302 iqot = 12,
303 irem = 13,
304 invm = 14,
305 rsqtm = 15
306};
307
308static inline int mathArgCount(MathFunction func)
309{
310 static const char argCounts[16] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 1};
311 return argCounts[static_cast<uint8_t>(func) & 0xF];
312}
313
314#ifdef NGEN_ASM
315static inline std::ostream &operator<<(std::ostream &str, MathFunction func)
316{
317 static const char *names[16] = {"", "inv", "log", "exp", "sqt", "rsqt", "sin", "cos", "", "fdiv", "pow", "idiv", "iqot", "irem", "invm", "rsqtm"};
318 str << names[static_cast<uint8_t>(func) & 0xF];
319 return str;
320}
321#endif
322
323static inline bool hasIEEEMacro(HW hw) {
324 if (hw == HW::Gen12LP) return false;
325 if (hw == HW::XeHPG) return false;
326 return true;
327}
328
329// Sync function codes.
330enum class SyncFunction : uint8_t {
331 nop = 0,
332 allrd = 2,
333 allwr = 3,
334 bar = 14,
335 host = 15
336};
337
338#ifdef NGEN_ASM
339static inline std::ostream &operator<<(std::ostream &str, SyncFunction func)
340{
341 static const char *names[16] = {"nop", "", "allrd", "allwr", "", "", "", "", "", "", "", "", "", "", "bar", "host"};
342 str << names[static_cast<uint8_t>(func) & 0xF];
343 return str;
344}
345#endif
346
347// Shared function IDs (SFIDs).
348enum class SharedFunction : uint8_t {
349 null = 0x0,
350 smpl = 0x2,
351 gtwy = 0x3,
352 dc2 = 0x4,
353 rc = 0x5,
354 urb = 0x6,
355 ts = 0x7,
356 vme = 0x8,
357 dcro = 0x9,
358 dc0 = 0xA,
359 pixi = 0xB,
360 dc1 = 0xC,
361 cre = 0xD,
362 btd = 0x7,
363 rta = 0x8,
364 ugml = 0x1,
365 tgm = 0xD,
366 slm = 0xE,
367 ugm = 0xF,
368
369 // alias
370 sampler = smpl,
371 gateway = gtwy,
372 spawner = ts,
373};
374
375#ifdef NGEN_ASM
376static inline const char *getMnemonic(SharedFunction sfid, HW hw)
377{
378 static const char *names[16] = {
379 "null", "" , "smpl", "gtwy", "dc2", "rc" , "urb", "ts" ,
380 "vme" , "dcro", "dc0" , "pixi", "dc1", "cre", "" , "" ,
381 };
382 static const char *namesLSC[16] = {
383 "null", "ugml", "smpl", "gtwy", "dc2", "rc" , "urb", "btd",
384 "rta" , "dcro", "dc0" , "pixi", "dc1", "tgm", "slm", "ugm",
385 };
386 const auto &table = (hw >= HW::XeHPG) ? namesLSC : names;
387 return table[static_cast<uint8_t>(sfid) & 0xF];
388}
389#endif
390
391// ARFs: high nybble of register # specifies type
392enum class ARFType : uint8_t {
393 null = 0,
394 a = 1,
395 acc = 2,
396 f = 3,
397 ce = 4,
398 msg = 5,
399 sp = 6,
400 sr = 7,
401 cr = 8,
402 n = 9,
403 ip = 10,
404 tdr = 11,
405 tm = 12,
406 fc = 13,
407 dbg = 15,
408};
409
410#ifdef NGEN_ASM
411static inline std::ostream &operator<<(std::ostream &str, ARFType type)
412{
413 static const char *names[16] = {"null", "a", "acc", "f", "ce", "msg", "sp", "sr", "cr", "n", "ip", "tdr", "tm", "fc", "", "dbg"};
414 str << names[static_cast<uint8_t>(type) & 0xF];
415 return str;
416}
417
418enum class PrintDetail {base = 0, sub_no_type = 1, sub = 2, hs = 3, vs_hs = 4, full = 5};
419#endif
420
421// Invalid singleton class. Can be assigned to nGEN objects to invalidate them.
422static constexpr class Invalid {} invalid{};
423
424class LabelManager {
425protected:
426 uint32_t nextID;
427 std::vector<uint32_t> targets;
428
429 enum TargetConstants : uint32_t {
430 noTarget = uint32_t(-1),
431 };
432
433public:
434 LabelManager() : nextID(0) {}
435
436 uint32_t getNewID() {
437 targets.push_back(TargetConstants::noTarget);
438 return nextID++;
439 }
440
441 bool hasTarget(uint32_t id) const {
442 return (targets[id] != TargetConstants::noTarget);
443 }
444
445 void setTarget(uint32_t id, uint32_t target) {
446#ifdef NGEN_SAFE
447 if (hasTarget(id)) throw multiple_label_exception();
448#endif
449 targets[id] = target;
450 }
451
452 void offsetTarget(uint32_t id, uint32_t offset) {
453#ifdef NGEN_SAFE
454 if (!hasTarget(id)) throw dangling_label_exception();
455#endif
456 targets[id] += offset;
457 }
458
459 uint32_t getTarget(uint32_t id) const {
460#ifdef NGEN_SAFE
461 if (!hasTarget(id)) throw dangling_label_exception();
462#endif
463 return targets[id];
464 }
465};
466
467// An object representing a label.
468class Label {
469protected:
470 unsigned id : 31;
471 unsigned uninit : 1;
472
473public:
474 Label() : id(0), uninit(true) {}
475
476 uint32_t getID(LabelManager &man) {
477 if (uninit) {
478 id = man.getNewID();
479 uninit = false;
480 }
481 return id;
482 }
483
484 bool defined(const LabelManager &man) const {
485 return !uninit && man.hasTarget(id);
486 }
487
488 /* for compatibility with RegData */
489 void fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity) {}
490 constexpr14 bool isScalar() const { return false; }
491
492#ifdef NGEN_ASM
493 static const bool emptyOp = false;
494 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man);
495#endif
496};
497
498static inline bool operator==(const RegData &r1, const RegData &r2);
499static inline bool operator!=(const RegData &r1, const RegData &r2);
500
501// Superclass for registers, subregisters, and register regions, possibly
502// with source modifiers.
503class RegData {
504protected:
505 unsigned base : 8;
506 unsigned arf : 1;
507 signed off : 11;
508 unsigned mods : 2;
509 unsigned type : 8;
510 unsigned indirect : 1;
511 unsigned _pad1 : 1;
512 unsigned vs : 7;
513 unsigned width : 5;
514 unsigned hs : 6;
515 unsigned _pad2 : 13;
516 unsigned invalid : 1;
517
518 constexpr RegData(int base_, bool arf_, int off_, bool indirect_, DataType type_, int vs_, int width_, int hs_)
519 : base(base_), arf(arf_), off(off_), mods(0), type(static_cast<int>(type_)), indirect(indirect_), _pad1(0), vs(vs_), width(width_), hs(hs_), _pad2(0), invalid(0) {}
520
521public:
522#ifdef NGEN_ASM
523 static const bool emptyOp = false;
524#endif
525
526 constexpr RegData()
527 : base(0), arf(0), off(0), mods(0), type(0), indirect(0), _pad1(0), vs(0), width(0), hs(0), _pad2(0), invalid(1) {}
528
529 constexpr int getBase() const { return base; }
530 constexpr bool isARF() const { return arf; }
531 constexpr int getARFBase() const { return base & 0xF; }
532 constexpr ARFType getARFType() const { return static_cast<ARFType>(base >> 4); }
533 constexpr bool isIndirect() const { return indirect; }
534 constexpr bool isVxIndirect() const { return indirect && (vs == 0x7F); }
535 constexpr int getIndirectBase() const { return base >> 4; }
536 constexpr int getIndirectOff() const { return base & 0xF; }
537 constexpr bool isNull() const { return isARF() && (getARFType() == ARFType::null); }
538 constexpr bool isInvalid() const { return invalid; }
539 constexpr bool isValid() const { return !invalid; }
540 constexpr int getOffset() const { return off; }
541 constexpr int getByteOffset() const { return off * getBytes(); }
542 constexpr DataType getType() const { return static_cast<DataType>(type); }
543 constexpr int getVS() const { return vs; }
544 constexpr int getWidth() const { return width; }
545 constexpr int getHS() const { return hs; }
546 constexpr bool getNeg() const { return mods & 2; }
547 constexpr bool getAbs() const { return mods & 1; }
548 constexpr int getMods() const { return mods; }
549 constexpr int getBytes() const { return ngen::getBytes(getType()); }
550 constexpr14 int getDwords() const { return ngen::getDwords(getType()); }
551 constexpr bool isScalar() const { return hs == 0 && vs == 0 && width == 1; }
552
553 constexpr14 RegData &setBase(int base_) { base = base_; return *this; }
554 constexpr14 RegData &setOffset(int off_) { off = off_; return *this; }
555 constexpr14 RegData &setType(DataType newType) { type = static_cast<unsigned>(newType); return *this; }
556 constexpr14 RegData &setMods(int mods_) { mods = mods_; return *this; }
557 constexpr14 RegData &setRegion(int vs_, int width_, int hs_) { vs = vs_; width = width_; hs = hs_; return *this; }
558 constexpr14 RegData &setARF(bool arf_) { arf = arf_; return *this; }
559
560 void invalidate() { invalid = true; }
561 RegData &operator=(const Invalid &i) { this->invalidate(); return *this; }
562
563 inline void fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity); // Adjust automatically-computed strides given ESize.
564
565 constexpr RegData operator+() const { return *this; }
566 constexpr14 RegData operator-() const {
567 auto result = *this;
568 result.negate();
569 return result;
570 }
571 constexpr14 RegData operator~() const { return -*this; }
572 constexpr14 void negate() { mods = mods ^ 2; }
573
574 friend inline bool operator==(const RegData &r1, const RegData &r2);
575 friend inline bool operator!=(const RegData &r1, const RegData &r2);
576
577 friend inline RegData abs(const RegData &r);
578
579#ifdef NGEN_ASM
580 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
581#endif
582};
583
584static_assert(sizeof(RegData) == 8, "RegData structure is not laid out correctly in memory.");
585
586static inline bool operator==(const RegData &r1, const RegData &r2) {
587 return *((uint64_t *) &r1) == *((uint64_t *) &r2);
588}
589
590static inline bool operator!=(const RegData &r1, const RegData &r2) {
591 return !(r1 == r2);
592}
593
594inline RegData abs(const RegData &r)
595{
596 RegData result = r;
597 return result.setMods(1);
598}
599
600inline void RegData::fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity)
601{
602#ifdef NGEN_SAFE
603 if (isInvalid()) throw invalid_object_exception();
604#endif
605
606 if (getType() == DataType::invalid) {
607#ifdef NGEN_SAFE
608 if (defaultType == DataType::invalid)
609 throw missing_type_exception();
610#endif
611 setType(defaultType);
612 }
613 if (!isVxIndirect()) {
614 if (execSize == 1) {
615 vs = hs = 0;
616 width = 1;
617 } else if (width == 0) {
618 int maxWidth = 32 / getBytes();
619 width = (hs == 0) ? 1 : std::min<int>({int(maxWidth / hs), execSize, 16});
620 vs = width * hs;
621 if (arity == 3 && hw >= HW::Gen12LP && vs == 2) {
622#ifdef NGEN_SAFE
623 if (hs != 1) throw invalid_region_exception();
624#endif
625 vs = 1;
626 hs = 0;
627 }
628 }
629 if (isDest && hs == 0)
630 hs = 1;
631 }
632}
633
634// Operands for Align16 instructions
635class Align16Operand {
636protected:
637 RegData rd;
638 unsigned chanSel : 8;
639 unsigned chanEn : 4;
640 bool rep : 1;
641
642public:
643 constexpr Align16Operand(RegData rd_, int chanEn_) : rd(rd_), chanSel(0b11100100), chanEn(chanEn_), rep(false) {}
644 constexpr Align16Operand(RegData rd_, int s0, int s1, int s2, int s3) : rd(rd_),
645 chanSel((s0 & 3) | ((s1 & 3) << 2) | ((s2 & 3) << 4) | ((s3 & 3) << 6)), chanEn(0xF), rep(false) {}
646
647 static constexpr14 Align16Operand createBroadcast(RegData rd_) {
648 Align16Operand op{rd_, 0xF};
649 op.rep = true;
650 return op;
651 }
652
653 static constexpr14 Align16Operand createWithMME(RegData rd_, int mme) {
654 Align16Operand op{rd_, mme};
655 op.chanSel = mme;
656 return op;
657 }
658
659 RegData &getReg() { return rd; }
660 constexpr const RegData &getReg() const { return rd; }
661 constexpr uint8_t getChanSel() const { return chanSel; }
662 constexpr uint8_t getChanEn() const { return chanEn; }
663 constexpr bool isRep() const { return rep; }
664
665 constexpr bool isIndirect() const { return rd.isIndirect(); }
666 constexpr DataType getType() const { return rd.getType(); }
667 constexpr int getOffset() const { return rd.getOffset(); }
668 constexpr int getMods() const { return rd.getMods(); }
669 constexpr bool isARF() const { return rd.isARF(); }
670
671 void invalidate() { rd.invalidate(); }
672 Align16Operand &operator=(const Invalid &i) { this->invalidate(); return *this; }
673 bool isInvalid() const { return rd.isInvalid(); }
674 bool isValid() const { return !rd.isInvalid(); }
675 constexpr bool isScalar() const { return rd.isScalar(); }
676
677 void fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity) {
678 rd.fixup(hw, execSize, defaultType, isDest, arity);
679 }
680
681#ifdef NGEN_ASM
682 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
683 static const bool emptyOp = false;
684#endif
685};
686
687// Register regions.
688class RegisterRegion : public RegData
689{
690public:
691 constexpr RegisterRegion() : RegData() {}
692 constexpr14 RegisterRegion(RegData rdata_, int vs_, int width_, int hs_) {
693 *static_cast<RegData *>(this) = rdata_;
694 vs = vs_;
695 width = width_;
696 hs = hs_;
697 }
698
699 RegisterRegion &operator=(const Invalid &i) { this->invalidate(); return *this; }
700
701 constexpr RegisterRegion operator+() const { return *this; }
702 constexpr14 RegisterRegion operator-() const {
703 auto result = *this;
704 result.negate();
705 return result;
706 }
707 constexpr14 RegisterRegion operator~() const { return -*this; }
708};
709
710// Subregister; always associated with a specific data type.
711class Subregister : public RegData
712{
713protected:
714 void checkGRF() const {
715#ifdef NGEN_SAFE
716 if (isARF()) throw grf_expected_exception();
717#endif
718 }
719
720public:
721 constexpr Subregister() : RegData() {}
722 constexpr14 Subregister(RegData reg_, int offset_, DataType type_) {
723 *static_cast<RegData *>(this) = reg_;
724 off = offset_;
725 type = static_cast<int>(type_);
726 hs = vs = 0;
727 width = 1;
728 }
729 constexpr14 Subregister(RegData reg_, DataType type_) {
730 *static_cast<RegData *>(this) = reg_;
731 off = 0;
732 type = static_cast<int>(type_);
733 }
734
735 inline RegisterRegion operator()(int vs, int width, int hs) const;
736 inline RegisterRegion operator()(int vs, int hs) const;
737 inline RegisterRegion operator()(int hs) const;
738
739 Subregister &operator=(const Invalid &i) { this->invalidate(); return *this; }
740
741 constexpr Subregister operator+() const { return *this; }
742 constexpr14 Subregister operator-() const {
743 auto result = *this;
744 result.negate();
745 return result;
746 }
747 constexpr14 Subregister operator~() const { return -*this; }
748
749 Align16Operand swizzle(int s0, int s1, int s2, int s3) const { checkGRF(); return Align16Operand(*this, s0, s1, s2, s3); }
750 Align16Operand broadcast() const { checkGRF(); return Align16Operand::createBroadcast(*this); }
751 Align16Operand enable(bool c0, bool c1, bool c2, bool c3) const { checkGRF(); return Align16Operand(*this, (int(c3) << 3) | (int(c2) << 2) | (int(c1) << 1) | int(c0)); }
752 Align16Operand noSwizzle() const { return swizzle(0, 1, 2, 3); }
753 Align16Operand enableAll() const { return enable(true, true, true, true); }
754
755 inline Subregister reinterpret(int offset, DataType type_) const;
756 template <typename T> Subregister reinterpret(int offset = 0) const { return reinterpret(offset, getDataType<T>()); }
757
758 inline Subregister offset(int off) const { return reinterpret(off, getType()); }
759
760 Subregister uq(int offset = 0) const { return reinterpret(offset, DataType::uq); }
761 Subregister q(int offset = 0) const { return reinterpret(offset, DataType::q); }
762 Subregister ud(int offset = 0) const { return reinterpret(offset, DataType::ud); }
763 Subregister d(int offset = 0) const { return reinterpret(offset, DataType::d); }
764 Subregister uw(int offset = 0) const { return reinterpret(offset, DataType::uw); }
765 Subregister w(int offset = 0) const { return reinterpret(offset, DataType::w); }
766 Subregister ub(int offset = 0) const { return reinterpret(offset, DataType::ub); }
767 Subregister b(int offset = 0) const { return reinterpret(offset, DataType::b); }
768 Subregister df(int offset = 0) const { return reinterpret(offset, DataType::df); }
769 Subregister f(int offset = 0) const { return reinterpret(offset, DataType::f); }
770 Subregister hf(int offset = 0) const { return reinterpret(offset, DataType::hf); }
771 Subregister bf(int offset = 0) const { return reinterpret(offset, DataType::bf); }
772 Subregister tf32(int offset = 0) const { return reinterpret(offset, DataType::tf32); }
773 Subregister bf8(int offset = 0) const { return reinterpret(offset, DataType::bf8); }
774};
775
776// Single register.
777class Register : public RegData
778{
779public:
780 constexpr Register() : RegData() {}
781 constexpr Register(int reg_, bool arf_, DataType defaultType = DataType::invalid, int off_ = 0)
782 : RegData(reg_, arf_, off_, false, defaultType, 0, 0, 1) {}
783
784 constexpr Register operator+() const { return *this; }
785 constexpr14 Register operator-() const {
786 auto result = *this;
787 result.negate();
788 return result;
789 }
790 constexpr14 Register operator~() const { return -*this; }
791
792 constexpr14 Subregister sub(int offset, DataType type_) const { return Subregister(*this, offset, type_); }
793 template <typename T> constexpr14 Subregister sub(int offset) const { return sub(offset, getDataType<T>()); }
794
795 constexpr14 Register retype(DataType type_) const { auto clone = *this; clone.setType(type_); return clone; }
796 template <typename T> constexpr14 Register retype() const { return retype(getDataType<T>()); }
797
798 constexpr14 Subregister uq(int offset) const { return sub(offset, DataType::uq); }
799 constexpr14 Subregister q(int offset) const { return sub(offset, DataType::q); }
800 constexpr14 Subregister ud(int offset) const { return sub(offset, DataType::ud); }
801 constexpr14 Subregister d(int offset) const { return sub(offset, DataType::d); }
802 constexpr14 Subregister uw(int offset) const { return sub(offset, DataType::uw); }
803 constexpr14 Subregister w(int offset) const { return sub(offset, DataType::w); }
804 constexpr14 Subregister ub(int offset) const { return sub(offset, DataType::ub); }
805 constexpr14 Subregister b(int offset) const { return sub(offset, DataType::b); }
806 constexpr14 Subregister df(int offset) const { return sub(offset, DataType::df); }
807 constexpr14 Subregister f(int offset) const { return sub(offset, DataType::f); }
808 constexpr14 Subregister hf(int offset) const { return sub(offset, DataType::hf); }
809 constexpr14 Subregister bf(int offset) const { return sub(offset, DataType::bf); }
810 constexpr14 Subregister tf32(int offset) const { return sub(offset, DataType::tf32); }
811 constexpr14 Subregister bf8(int offset) const { return sub(offset, DataType::bf8); }
812
813 constexpr14 Register uq() const { return retype(DataType::uq); }
814 constexpr14 Register q() const { return retype(DataType::q); }
815 constexpr14 Register ud() const { return retype(DataType::ud); }
816 constexpr14 Register d() const { return retype(DataType::d); }
817 constexpr14 Register uw() const { return retype(DataType::uw); }
818 constexpr14 Register w() const { return retype(DataType::w); }
819 constexpr14 Register ub() const { return retype(DataType::ub); }
820 constexpr14 Register b() const { return retype(DataType::b); }
821 constexpr14 Register df() const { return retype(DataType::df); }
822 constexpr14 Register f() const { return retype(DataType::f); }
823 constexpr14 Register hf() const { return retype(DataType::hf); }
824 constexpr14 Register bf() const { return retype(DataType::bf); }
825 constexpr14 Register tf32() const { return retype(DataType::tf32); }
826 constexpr14 Register bf8() const { return retype(DataType::bf8); }
827
828 constexpr14 Subregister operator[](int offset) const { return sub(offset, getType()); }
829
830 Register &operator=(const Invalid &i) { this->invalidate(); return *this; }
831};
832
833class GRF : public Register
834{
835public:
836 GRF() : Register() {}
837 explicit constexpr GRF(int reg_) : Register(reg_, false) {}
838
839 constexpr GRF operator+() const { return *this; }
840 constexpr14 GRF operator-() const {
841 auto result = *this;
842 result.negate();
843 return result;
844 }
845 constexpr14 GRF operator~() const { return -*this; }
846
847 constexpr14 GRF retype(DataType type_) const { auto clone = *this; clone.setType(type_); return clone; }
848 template <typename T> constexpr14 Register retype() const { return retype(getDataType<T>()); }
849
850 constexpr14 Subregister uq(int offset) const { return sub(offset, DataType::uq); }
851 constexpr14 Subregister q(int offset) const { return sub(offset, DataType::q); }
852 constexpr14 Subregister ud(int offset) const { return sub(offset, DataType::ud); }
853 constexpr14 Subregister d(int offset) const { return sub(offset, DataType::d); }
854 constexpr14 Subregister uw(int offset) const { return sub(offset, DataType::uw); }
855 constexpr14 Subregister w(int offset) const { return sub(offset, DataType::w); }
856 constexpr14 Subregister ub(int offset) const { return sub(offset, DataType::ub); }
857 constexpr14 Subregister b(int offset) const { return sub(offset, DataType::b); }
858 constexpr14 Subregister df(int offset) const { return sub(offset, DataType::df); }
859 constexpr14 Subregister f(int offset) const { return sub(offset, DataType::f); }
860 constexpr14 Subregister hf(int offset) const { return sub(offset, DataType::hf); }
861 constexpr14 Subregister bf(int offset) const { return sub(offset, DataType::bf); }
862 constexpr14 Subregister bf8(int offset) const { return sub(offset, DataType::bf8); }
863 constexpr14 Subregister tf32(int offset) const { return sub(offset, DataType::tf32); }
864
865 constexpr14 GRF uq() const { return retype(DataType::uq); }
866 constexpr14 GRF q() const { return retype(DataType::q); }
867 constexpr14 GRF ud() const { return retype(DataType::ud); }
868 constexpr14 GRF d() const { return retype(DataType::d); }
869 constexpr14 GRF uw() const { return retype(DataType::uw); }
870 constexpr14 GRF w() const { return retype(DataType::w); }
871 constexpr14 GRF ub() const { return retype(DataType::ub); }
872 constexpr14 GRF b() const { return retype(DataType::b); }
873 constexpr14 GRF df() const { return retype(DataType::df); }
874 constexpr14 GRF f() const { return retype(DataType::f); }
875 constexpr14 GRF hf() const { return retype(DataType::hf); }
876 constexpr14 GRF bf() const { return retype(DataType::bf); }
877 constexpr14 GRF bf8() const { return retype(DataType::bf8); }
878 constexpr14 GRF tf32() const { return retype(DataType::tf32); }
879
880 Align16Operand swizzle(int s0, int s1, int s2, int s3) const { return Align16Operand(*this, s0, s1, s2, s3); }
881 Align16Operand enable(bool c0, bool c1, bool c2, bool c3) const { return Align16Operand(*this, (int(c3) << 3) | (int(c2) << 2) | (int(c1) << 1) | int(c0)); }
882 Align16Operand noSwizzle() const { return swizzle(0, 1, 2, 3); }
883 Align16Operand enableAll() const { return enable(true, true, true, true); }
884
885 GRF &operator=(const Invalid &i) { this->invalidate(); return *this; }
886
887 GRF &operator+=(const int &inc) {
888 base += inc;
889 return *this;
890 }
891
892 GRF operator++(int i) {
893 GRF old = *this;
894 ++*this;
895 return old;
896 }
897
898 GRF &operator++() {
899 *this += 1;
900 return *this;
901 }
902
903 GRF advance(int inc) {
904 auto result = *this;
905 result += inc;
906 return result;
907 }
908
909 inline GRFDisp operator+(int offset) const;
910 inline GRFDisp operator-(int offset) const;
911
912 static constexpr int log2Bytes(HW hw) { return (hw == HW::XeHPC) ? 6 : 5; }
913 static constexpr int bytes(HW hw) { return (1 << log2Bytes(hw)); }
914 static constexpr int bytesToGRFs(HW hw, unsigned x) { return (x + bytes(hw) - 1) >> log2Bytes(hw); }
915};
916
917class GRFDisp {
918protected:
919 GRF base;
920 int32_t disp;
921
922public:
923 GRFDisp(const GRF &base_, int32_t disp_) : base(base_), disp(disp_) {}
924 /* implicit */ GRFDisp(const RegData &rd) : base(reinterpret_cast<const GRF &>(rd)), disp(0) {}
925
926 constexpr GRF getBase() const { return base; }
927 constexpr int32_t getDisp() const { return disp; }
928};
929
930GRFDisp GRF::operator+(int offset) const { return GRFDisp(*this, offset); }
931GRFDisp GRF::operator-(int offset) const { return *this + (-offset); }
932
933class ARF : public Register
934{
935public:
936 constexpr ARF() : Register() {}
937 constexpr ARF(ARFType type_, int reg_, DataType defaultType = DataType::invalid, int off_ = 0)
938 : Register((static_cast<int>(type_) << 4) | (reg_ & 0xF), true, defaultType, off_) {}
939
940 ARF &operator=(const Invalid &i) { this->invalidate(); return *this; }
941};
942
943class NullRegister : public ARF
944{
945public:
946 constexpr NullRegister() : ARF(ARFType::null, 0, DataType::ud) {}
947};
948
949class AddressRegister : public ARF
950{
951public:
952 constexpr AddressRegister() : ARF() {}
953 explicit constexpr AddressRegister(int reg_) : ARF(ARFType::a, reg_, DataType::uw) {}
954
955 AddressRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
956};
957
958class AccumulatorRegister : public ARF
959{
960public:
961 constexpr AccumulatorRegister() : ARF() {}
962 explicit constexpr AccumulatorRegister(int reg_) : ARF(ARFType::acc, reg_) {}
963
964 AccumulatorRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
965
966 static constexpr14 int count(HW hw, DataType dt = DataType::invalid) {
967 if (hw == HW::Gen9 && dt == DataType::df) return 0;
968 if (hw == HW::XeHPG && dt == DataType::df) return 0;
969 if (hw >= HW::XeHP) return 4;
970 return 2;
971 }
972 static constexpr14 int count(HW hw, int grfCount, DataType dt = DataType::invalid) {
973 return count(hw, dt) * (grfCount == 256 ? 2 : 1);
974 }
975};
976
977class SpecialAccumulatorRegister : public AccumulatorRegister
978{
979 uint8_t mmeNum;
980
981public:
982 constexpr SpecialAccumulatorRegister() : AccumulatorRegister(), mmeNum(0) {}
983 constexpr SpecialAccumulatorRegister(int reg_, int mmeNum_) : AccumulatorRegister(reg_), mmeNum(mmeNum_) {}
984
985 static constexpr SpecialAccumulatorRegister createNoMME() { return SpecialAccumulatorRegister(0, 8); }
986
987 constexpr uint8_t getMME() const { return mmeNum; }
988
989 SpecialAccumulatorRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
990};
991
992// An "extended register" is a combination of a regular GRF and some extra accumulator bits, used for math macro operations.
993class ExtendedReg {
994 RegData base;
995 uint8_t mmeNum;
996
997public:
998 constexpr ExtendedReg(RegData base_, uint8_t mmeNum_) : base(base_), mmeNum(mmeNum_) {}
999 constexpr ExtendedReg(RegData base_, SpecialAccumulatorRegister acc) : base(base_), mmeNum(acc.getMME()) {}
1000
1001 void fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity) {
1002 base.fixup(hw, execSize, defaultType, isDest, arity);
1003 }
1004
1005 constexpr int getMods() const { return base.getMods(); }
1006 constexpr DataType getType() const { return base.getType(); }
1007 constexpr int getOffset() const { return base.getOffset(); }
1008 constexpr bool isIndirect() const { return base.isIndirect(); }
1009 constexpr bool isInvalid() const { return base.isInvalid(); }
1010 constexpr bool isValid() const { return !base.isInvalid(); }
1011 constexpr bool isScalar() const { return base.isScalar(); }
1012 constexpr bool isARF() const { return base.isARF(); }
1013
1014 constexpr14 RegData &getBase() { return base; }
1015 constexpr RegData getBase() const { return base; }
1016 constexpr uint8_t getMMENum() const { return mmeNum; }
1017
1018#ifdef NGEN_ASM
1019 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
1020 static const bool emptyOp = false;
1021#endif
1022};
1023
1024static inline ExtendedReg operator|(const RegData &base, const SpecialAccumulatorRegister &acc)
1025{
1026 return ExtendedReg(base, acc);
1027}
1028
1029class FlagRegister : public ARF
1030{
1031public:
1032 constexpr FlagRegister() : ARF() {}
1033 explicit constexpr FlagRegister(int reg_) : ARF(ARFType::f, reg_, DataType::ud, 0) {}
1034 constexpr FlagRegister(int reg_, int off_) : ARF(ARFType::f, reg_, DataType::uw, off_) {}
1035
1036 static FlagRegister createFromIndex(int index) {
1037 return FlagRegister(index >> 1, index & 1);
1038 }
1039
1040 FlagRegister operator~() const {
1041 FlagRegister result = *this;
1042 result.mods = result.mods ^ 2;
1043 return result;
1044 }
1045
1046 FlagRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
1047
1048 constexpr FlagRegister operator[](int offset) const { return FlagRegister(getARFBase(), getOffset() + offset); }
1049
1050 int index() const { return (getARFBase() << 1) + getOffset(); }
1051
1052 static inline constexpr int count(HW hw) {
1053 return (hw == HW::XeHPC) ? 4 : 2;
1054 }
1055 static inline constexpr int subcount(HW hw) { return count(hw) * 2; }
1056};
1057
1058class ChannelEnableRegister : public ARF
1059{
1060public:
1061 explicit constexpr ChannelEnableRegister(int reg_ = 0) : ARF(ARFType::ce, reg_, DataType::ud) {}
1062};
1063
1064class StackPointerRegister : public ARF
1065{
1066public:
1067 explicit constexpr StackPointerRegister(int reg_ = 0) : ARF(ARFType::sp, reg_, DataType::uq) {}
1068};
1069
1070class StateRegister : public ARF
1071{
1072public:
1073 explicit constexpr StateRegister(int reg_ = 0) : ARF(ARFType::sr, reg_, DataType::ud) {}
1074};
1075
1076class ControlRegister : public ARF
1077{
1078public:
1079 explicit constexpr ControlRegister(int reg_ = 0) : ARF(ARFType::cr, reg_, DataType::ud) {}
1080};
1081
1082class NotificationRegister : public ARF
1083{
1084public:
1085 explicit constexpr NotificationRegister(int reg_ = 0) : ARF(ARFType::n, reg_, DataType::ud) {}
1086};
1087
1088class InstructionPointerRegister : public ARF
1089{
1090public:
1091 constexpr InstructionPointerRegister() : ARF(ARFType::ip, 0, DataType::ud) {}
1092};
1093
1094class ThreadDependencyRegister : public ARF
1095{
1096public:
1097 explicit constexpr ThreadDependencyRegister(int reg_ = 0) : ARF(ARFType::tdr, reg_, DataType::uw) {}
1098};
1099
1100class PerformanceRegister : public ARF
1101{
1102public:
1103 explicit constexpr PerformanceRegister(int reg_ = 0, int off_ = 0) : ARF(ARFType::tm, reg_, DataType::ud, off_) {}
1104};
1105
1106class DebugRegister : public ARF
1107{
1108public:
1109 explicit constexpr DebugRegister(int reg_ = 0) : ARF(ARFType::dbg, reg_, DataType::ud) {}
1110};
1111
1112class FlowControlRegister : public ARF
1113{
1114public:
1115 explicit constexpr FlowControlRegister(int reg_ = 0) : ARF(ARFType::fc, reg_, DataType::ud) {}
1116};
1117
1118inline RegisterRegion Subregister::operator()(int vs, int width, int hs) const
1119{
1120 RegisterRegion rr(*this, vs, width, hs);
1121 return rr;
1122}
1123
1124inline RegisterRegion Subregister::operator()(int vs_or_width, int hs) const
1125{
1126 int vs, width;
1127
1128 if (isIndirect()) {
1129 vs = -1;
1130 width = vs_or_width;
1131 } else {
1132 vs = vs_or_width;
1133 width = (hs == 0) ? ((vs == 0) ? 1 : vs) : vs / hs;
1134 }
1135
1136 return operator()(vs, width, hs);
1137}
1138
1139inline RegisterRegion Subregister::operator()(int hs) const
1140{
1141 return operator()(0, 0, hs);
1142}
1143
1144inline Subregister Subregister::reinterpret(int offset, DataType type_) const
1145{
1146 Subregister r = *this;
1147 r.setType(type_);
1148
1149 int o = getOffset();
1150 int oldbytes = getBytes(), newbytes = r.getBytes();
1151 int bitdiff = (oldbytes == 0) ? 0
1152 : (utils::log2(newbytes) - utils::log2(oldbytes));
1153
1154 if (newbytes < oldbytes)
1155 r.setOffset((o << -bitdiff) + offset);
1156 else
1157 r.setOffset((o >> bitdiff) + offset);
1158
1159 return r;
1160}
1161
1162// Indirect register and frames for making them.
1163class IndirectRegister : public Register {
1164protected:
1165 explicit constexpr14 IndirectRegister(const RegData &reg) : Register((reg.getARFBase() << 4) | reg.getOffset(), false) {
1166 indirect = true;
1167 }
1168 friend class IndirectRegisterFrame;
1169
1170 IndirectRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
1171};
1172
1173class IndirectRegisterFrame {
1174public:
1175 IndirectRegister operator[](const RegData &reg) const {
1176#ifdef NGEN_SAFE
1177 if (!reg.isARF() || reg.getARFType() != ARFType::a)
1178 throw invalid_arf_exception();
1179#endif
1180 return IndirectRegister(reg);
1181 }
1182};
1183
1184// GRFRange represents a contiguous range of GRF registers.
1185class GRFRange {
1186protected:
1187 uint8_t base;
1188 uint8_t len;
1189
1190 static constexpr uint8_t invalidLen = 0xFF;
1191
1192public:
1193 GRFRange() : GRFRange(0, invalidLen) {}
1194 GRFRange(int base_, int len_) : base(base_), len(len_) {}
1195 GRFRange(GRF base_, int len_) : GRFRange(base_.getBase(), len_) {}
1196
1197 int getBase() const { return base; }
1198 int getLen() const { return len; }
1199 bool isEmpty() const { return len == 0; }
1200 bool isNull() const { return false; }
1201
1202 void invalidate() { len = invalidLen; }
1203 bool isInvalid() const { return len == invalidLen; }
1204 bool isValid() const { return !isInvalid(); }
1205
1206 GRFRange &operator=(const Invalid &i) { this->invalidate(); return *this; }
1207
1208 GRF operator[](int i) const {
1209#ifdef NGEN_SAFE
1210 if (isInvalid()) throw invalid_object_exception();
1211#endif
1212 return GRF(base + i);
1213 }
1214
1215 operator GRF() const { return (*this)[0]; }
1216
1217 inline Subregister sub(HW hw, int offset, DataType type) const;
1218
1219 void fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity) {}
1220};
1221
1222static inline GRFRange operator-(const GRF &reg1, const GRF &reg2)
1223{
1224 uint8_t b1 = reg1.getBase(), b2 = reg2.getBase();
1225 int len = int(b2) + 1 - int(b1);
1226
1227#ifdef NGEN_SAFE
1228 if (len < 0) throw invalid_range_exception();
1229#endif
1230
1231 return GRFRange(reg1, len);
1232}
1233
1234static inline bool operator==(const GRFRange &r1, const GRFRange &r2)
1235{
1236 return (r1.getBase() == r2.getBase()) && (r1.getLen() == r2.getLen());
1237}
1238
1239static inline bool operator!=(const GRFRange &r1, const GRFRange &r2)
1240{
1241 return !(r1 == r2);
1242}
1243
1244Subregister GRFRange::sub(HW hw, int offset, DataType type) const {
1245 const int lg2Len = GRF::log2Bytes(hw) - getLog2Bytes(type);
1246 return (*this)[offset >> lg2Len].sub(offset - ((offset >> lg2Len) << lg2Len), type);
1247}
1248
1249enum class ConditionModifier {
1250 none = 0,
1251 ze = 1,
1252 eq = 1,
1253 nz = 2,
1254 ne = 2,
1255 gt = 3,
1256 ge = 4,
1257 lt = 5,
1258 le = 6,
1259 ov = 8,
1260 un = 9,
1261 eo = 0xF
1262};
1263
1264#ifdef NGEN_ASM
1265static inline std::ostream &operator<<(std::ostream &str, ConditionModifier cmod)
1266{
1267 static const char *names[16] = {"", "eq", "ne", "gt", "ge", "lt", "le", "", "ov", "un", "", "", "", "", "", "eo"};
1268 str << names[static_cast<uint8_t>(cmod) & 0xF];
1269 return str;
1270}
1271#endif
1272
1273enum class ChannelMask {
1274 rgba = 0,
1275 gba = 1,
1276 rba = 2,
1277 ba = 3,
1278 rga = 4,
1279 bga = 5,
1280 ga = 6,
1281 a = 7,
1282 rgb = 8,
1283 gb = 9,
1284 rb = 10,
1285 b = 11,
1286 rg = 12,
1287 g = 13,
1288 r = 14,
1289};
1290
1291enum class PredCtrl {
1292 None = 0,
1293 Normal = 1,
1294 anyv = 2,
1295 allv = 3,
1296 any2h = 4,
1297 all2h = 5,
1298 any4h = 6,
1299 all4h = 7,
1300 any8h = 8,
1301 all8h = 9,
1302 any16h = 10,
1303 all16h = 11,
1304 any32h = 12,
1305 all32h = 13,
1306 any = 14,
1307 all = 15,
1308 x = 2,
1309 y = 3,
1310 z = 4,
1311 w = 5,
1312};
1313
1314#ifdef NGEN_ASM
1315static const char *toText(PredCtrl ctrl, bool align16) {
1316 const char *names[2][16] = {{"", "", "anyv", "allv", "any2h", "all2h", "any4h", "all4h", "any8h", "all8h", "any16h", "all16h", "any32h", "all32h", "any", "all"},
1317 {"", "", "x", "y", "z", "w", "", "", "", "", "", "", "", "", "", ""}};
1318 return names[align16][static_cast<int>(ctrl) & 0xF];
1319}
1320#endif
1321
1322enum class ThreadCtrl {
1323 Normal = 0,
1324 Atomic = 1,
1325 Switch = 2,
1326 NoPreempt = 3
1327};
1328
1329enum class Opcode {
1330 illegal = 0x00,
1331 sync = 0x01,
1332 mov = 0x01,
1333 sel = 0x02,
1334 movi = 0x03,
1335 not_ = 0x04,
1336 and_ = 0x05,
1337 or_ = 0x06,
1338 xor_ = 0x07,
1339 shr = 0x08,
1340 shl = 0x09,
1341 smov = 0x0A,
1342 asr = 0x0C,
1343 ror = 0x0E,
1344 rol = 0x0F,
1345 cmp = 0x10,
1346 cmpn = 0x11,
1347 csel = 0x12,
1348 bfrev = 0x17,
1349 bfe = 0x18,
1350 bfi1 = 0x19,
1351 bfi2 = 0x1A,
1352 jmpi = 0x20,
1353 brd = 0x21,
1354 if_ = 0x22,
1355 brc = 0x23,
1356 else_ = 0x24,
1357 endif = 0x25,
1358 while_ = 0x27,
1359 break_ = 0x28,
1360 cont = 0x29,
1361 halt = 0x2A,
1362 calla = 0x2B,
1363 call = 0x2C,
1364 ret = 0x2D,
1365 goto_ = 0x2E,
1366 join = 0x2F,
1367 wait = 0x30,
1368 send = 0x31,
1369 sendc = 0x32,
1370 sends = 0x33,
1371 sendsc = 0x34,
1372 math = 0x38,
1373 add = 0x40,
1374 mul = 0x41,
1375 avg = 0x42,
1376 frc = 0x43,
1377 rndu = 0x44,
1378 rndd = 0x45,
1379 rnde = 0x46,
1380 rndz = 0x47,
1381 mac = 0x48,
1382 mach = 0x49,
1383 lzd = 0x4A,
1384 fbh = 0x4B,
1385 fbl = 0x4C,
1386 cbit = 0x4D,
1387 addc = 0x4E,
1388 subb = 0x4F,
1389 sad2 = 0x50,
1390 sada2 = 0x51,
1391 add3 = 0x52,
1392 macl = 0x53,
1393 srnd = 0x54,
1394 dp4 = 0x54,
1395 dph = 0x55,
1396 dp3 = 0x56,
1397 dp2 = 0x57,
1398 dp4a = 0x58,
1399 line = 0x59,
1400 dpas = 0x59,
1401 pln = 0x5A,
1402 dpasw = 0x5A,
1403 mad = 0x5B,
1404 lrp = 0x5C,
1405 madm = 0x5D,
1406 nop_gen12 = 0x60,
1407 mov_gen12 = 0x61,
1408 sel_gen12 = 0x62,
1409 movi_gen12 = 0x63,
1410 not_gen12 = 0x64,
1411 and_gen12 = 0x65,
1412 or_gen12 = 0x66,
1413 xor_gen12 = 0x67,
1414 shr_gen12 = 0x68,
1415 shl_gen12 = 0x69,
1416 smov_gen12 = 0x6A,
1417 bfn = 0x6B,
1418 asr_gen12 = 0x6C,
1419 ror_gen12 = 0x6E,
1420 rol_gen12 = 0x6F,
1421 cmp_gen12 = 0x70,
1422 cmpn_gen12 = 0x71,
1423 csel_gen12 = 0x72,
1424 bfrev_gen12 = 0x77,
1425 bfe_gen12 = 0x78,
1426 bfi1_gen12 = 0x79,
1427 bfi2_gen12 = 0x7A,
1428 nop = 0x7E,
1429 wrdep = 0x7F, /* not a valid opcode; used internally by nGEN */
1430};
1431
1432static inline bool isVariableLatency(HW hw, Opcode op)
1433{
1434 switch (op) {
1435 case Opcode::math:
1436 if (hw >= HW::XeHPC) return false;
1437 case Opcode::send:
1438 case Opcode::sendc:
1439 case Opcode::dpas:
1440 case Opcode::dpasw:
1441 return true;
1442 default:
1443 return false;
1444 }
1445}
1446
1447static inline bool isBranch(Opcode op)
1448{
1449 return (static_cast<int>(op) >> 4) == 2;
1450}
1451
1452#ifdef NGEN_ASM
1453static const char *getMnemonic(Opcode op, HW hw)
1454{
1455 const char *names[0x80] = {
1456 "illegal", "sync", "sel", "movi", "not", "and", "or", "xor",
1457 "shr", "shl", "smov", "", "asr", "", "ror", "rol",
1458 "cmp", "cmpn", "csel", "", "", "", "", "bfrev",
1459 "bfe", "bfi1", "bfi2", "", "", "", "", "",
1460 "jmpi", "brd", "if", "brc", "else", "endif", "", "while",
1461 "break", "cont", "halt", "calla", "call", "ret", "goto", "join",
1462 "wait", "send", "sendc", "sends", "sendsc", "", "", "",
1463 "math", "", "", "", "", "", "", "",
1464 "add", "mul", "avg", "frc", "rndu", "rndd", "rnde", "rndz",
1465 "mac", "mach", "lzd", "fbh", "fbl", "cbit", "addc", "subb",
1466 "sad2", "sada2", "add3", "macl", "srnd", "dph", "dp3", "dp2",
1467 "dp4a", "dpas", "dpasw", "mad", "lrp", "madm", "", "",
1468 "nop", "mov", "sel", "movi", "not", "and", "or", "xor",
1469 "shr", "shl", "smov", "bfn", "asr", "", "ror", "rol",
1470 "cmp", "cmpn", "csel", "", "", "", "", "bfrev",
1471 "bfe", "bfi1", "bfi2", "", "", "", "nop", ""
1472 };
1473
1474 const char *mnemonic = names[static_cast<int>(op) & 0x7F];
1475
1476 if (hw < HW::Gen12LP) switch (op) {
1477 case Opcode::mov: mnemonic = "mov"; break;
1478 case Opcode::line: mnemonic = "line"; break;
1479 case Opcode::pln: mnemonic = "pln"; break;
1480 case Opcode::dp4: mnemonic = "dp4"; break;
1481 default: break;
1482 }
1483
1484 return mnemonic;
1485}
1486#endif
1487
1488class AllPipes {};
1489enum class Pipe : uint8_t {
1490 Default = 0,
1491 A = 1, All = A,
1492 F = 2, Float = F,
1493 I = 3, Integer = I,
1494 L = 4, Long = L,
1495 M = 5, Math = M,
1496};
1497
1498#ifdef NGEN_ASM
1499static inline std::ostream &operator<<(std::ostream &str, Pipe pipe)
1500{
1501 static const char *names[8] = {"", "A", "F", "I", "L", "M", "", ""};
1502 str << names[static_cast<uint8_t>(pipe) & 7];
1503 return str;
1504}
1505#endif
1506
1507class SWSBInfo
1508{
1509 friend class InstructionModifier;
1510
1511public:
1512 union {
1513 struct {
1514 unsigned token : 5;
1515 unsigned noacc : 1;
1516 unsigned src : 1;
1517 unsigned dst : 1;
1518 unsigned dist : 4;
1519 unsigned pipe : 4;
1520 } parts;
1521 uint16_t all;
1522 };
1523
1524 constexpr bool hasDist() const { return parts.dist > 0; }
1525 constexpr bool hasToken() const { return parts.src || parts.dst; }
1526 constexpr bool hasTokenSet() const { return parts.src && parts.dst; }
1527 constexpr int getToken() const { return hasToken() ? parts.token : 0; }
1528 constexpr unsigned tokenMode() const { return (parts.src << 1) | parts.dst; }
1529 constexpr Pipe getPipe() const { return static_cast<Pipe>(parts.pipe); }
1530 void setPipe(Pipe pipe) { parts.pipe = static_cast<unsigned>(pipe); }
1531 constexpr bool empty() const { return (all == 0); }
1532
1533protected:
1534 explicit constexpr SWSBInfo(uint16_t all_) : all(all_) {}
1535
1536public:
1537 constexpr SWSBInfo() : all(0) {}
1538 constexpr SWSBInfo(Pipe pipe_, int dist_) : all(((dist_ & 0xF) << 8) | (static_cast<unsigned>(pipe_) << 12)) {}
1539 constexpr SWSBInfo(int id_, bool src_, bool dst_) : all(id_ | (uint16_t(src_) << 6) | (uint16_t(dst_) << 7)) {}
1540
1541 static constexpr SWSBInfo createNoAccSBSet() { return SWSBInfo(0x20); }
1542
1543 friend constexpr SWSBInfo operator|(const SWSBInfo &i1, const SWSBInfo &i2) { return SWSBInfo(i1.all | i2.all); }
1544};
1545
1546// Token count.
1547constexpr inline int tokenCount(HW hw)
1548{
1549 return (hw >= HW::XeHPC) ? 32 :
1550 (hw >= HW::Gen12LP) ? 16
1551 : 0;
1552}
1553
1554class SBID
1555{
1556public:
1557 SWSBInfo set;
1558 SWSBInfo src;
1559 SWSBInfo dst;
1560
1561 constexpr SBID(int id) : set(id, true, true), src(id, true, false), dst(id, false, true) {}
1562 constexpr operator SWSBInfo() const { return set; }
1563
1564 constexpr int getID() const { return set.getToken(); }
1565};
1566
1567template <typename T> static constexpr Pipe getPipe() { return (sizeof(T) == 8) ? Pipe::L : Pipe::I; }
1568template <> constexpr Pipe getPipe<float>() { return Pipe::F; }
1569template <> constexpr Pipe getPipe<void>() { return Pipe::Default; }
1570template <> constexpr Pipe getPipe<AllPipes>() { return Pipe::A; }
1571
1572constexpr SWSBInfo SWSB(SWSBInfo info) { return info; }
1573constexpr SWSBInfo SWSB(Pipe pipe, int dist) { return SWSBInfo(pipe, dist); }
1574template <typename T = void> constexpr SWSBInfo SWSB(int dist) { return SWSB(getPipe<T>(), dist); }
1575template <typename T = void> constexpr SWSBInfo SWSB(SWSBInfo info, int dist) { return SWSB<T>(dist) | info; }
1576
1577class InstructionModifier {
1578protected:
1579 union {
1580 struct {
1581 unsigned execSize : 8; // Execution size as integer (for internal use).
1582 unsigned accessMode : 1; // From here on matches the low 64-bits of the binary format for Gen8-11
1583 unsigned noDDClr : 1;
1584 unsigned noDDChk : 1;
1585 unsigned chanOff : 3;
1586 unsigned threadCtrl : 2;
1587 unsigned predCtrl : 4;
1588 unsigned predInv : 1;
1589 unsigned eSizeField : 3;
1590 unsigned cmod : 4; // Also stores channel mask temporarily for surface r/w
1591 unsigned accWrCtrl : 1; // = noSrcDepSet for send, = branchCtrl for branch instructions
1592 unsigned cmptCtrl : 1;
1593 unsigned debugCtrl : 1;
1594 unsigned saturate : 1;
1595 unsigned flagSubRegNum : 1;
1596 unsigned flagRegNum : 1;
1597 unsigned maskCtrl : 1;
1598 unsigned _zeros_: 9;
1599 unsigned flagRegNum1 : 1;
1600 unsigned autoSWSB : 1;
1601 unsigned fusionCtrl : 1; // Gen12
1602 unsigned eot : 1;
1603 unsigned swsb : 16;
1604 } parts;
1605 uint64_t all;
1606 };
1607
1608 constexpr InstructionModifier(uint64_t all_) : all(all_) {}
1609
1610 friend inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag);
1611 friend inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag);
1612
1613public:
1614 constexpr int getExecSize() const { return parts.execSize; }
1615 constexpr bool isAlign16() const { return parts.accessMode; }
1616 constexpr bool isNoDDClr() const { return parts.noDDClr; }
1617 constexpr bool isNoDDChk() const { return parts.noDDChk; }
1618 constexpr int getChannelOffset() const { return parts.chanOff << 2; }
1619 constexpr ThreadCtrl getThreadCtrl() const { return static_cast<ThreadCtrl>(parts.threadCtrl); }
1620 constexpr bool isAtomic() const { return getThreadCtrl() == ThreadCtrl::Atomic; }
1621 constexpr PredCtrl getPredCtrl() const { return static_cast<PredCtrl>(parts.predCtrl); }
1622 constexpr bool isPredInv() const { return parts.predInv; }
1623 constexpr ConditionModifier getCMod() const { return static_cast<ConditionModifier>(parts.cmod); }
1624 constexpr bool isAccWrEn() const { return parts.accWrCtrl; }
1625 constexpr bool getBranchCtrl() const { return parts.accWrCtrl; }
1626 constexpr bool isCompact() const { return parts.cmptCtrl; }
1627 constexpr bool isBreakpoint() const { return parts.debugCtrl; }
1628 constexpr bool isSaturate() const { return parts.saturate; }
1629 constexpr14 FlagRegister getFlagReg() const { return FlagRegister((parts.flagRegNum1 << 1) | parts.flagRegNum, parts.flagSubRegNum); }
1630 constexpr bool isWrEn() const { return parts.maskCtrl; }
1631 constexpr bool isAutoSWSB() const { return parts.autoSWSB; }
1632 constexpr bool isSerialized() const { return parts.fusionCtrl; }
1633 constexpr bool isEOT() const { return parts.eot; }
1634 constexpr SWSBInfo getSWSB() const { return SWSBInfo(parts.swsb); }
1635 constexpr uint64_t getAll() const { return all; }
1636
1637 constexpr14 void setExecSize(int execSize_) { parts.execSize = execSize_; parts.eSizeField = utils::log2(execSize_); }
1638 constexpr14 void setPredCtrl(PredCtrl predCtrl_) { parts.predCtrl = static_cast<unsigned>(predCtrl_); }
1639 constexpr14 void setPredInv(bool predInv_) { parts.predInv = predInv_; }
1640 constexpr14 void setCMod(const ConditionModifier &cmod_) { parts.cmod = static_cast<unsigned>(cmod_); }
1641 constexpr14 void setBranchCtrl(bool branchCtrl) { parts.accWrCtrl = branchCtrl; }
1642 constexpr14 void setFlagReg(FlagRegister &flag) { parts.flagRegNum1 = flag.getBase() >> 1; parts.flagRegNum = flag.getBase() & 1; parts.flagSubRegNum = flag.getOffset(); }
1643 constexpr14 void setWrEn(bool maskCtrl_) { parts.maskCtrl = maskCtrl_; }
1644 constexpr14 void setAutoSWSB(bool autoSWSB_) { parts.autoSWSB = autoSWSB_; }
1645 constexpr14 void setSWSB(SWSBInfo swsb_) { parts.swsb = swsb_.all; }
1646 constexpr14 void setSWSB(uint16_t swsb_) { parts.swsb = swsb_; }
1647
1648 constexpr InstructionModifier() : all(0) {}
1649
1650 // Hardcoded shift counts are a workaround for MSVC v140 bug.
1651 constexpr /* implicit */ InstructionModifier(const PredCtrl &predCtrl_)
1652 : all{static_cast<uint64_t>(predCtrl_) << 16} {}
1653
1654 constexpr /* implicit */ InstructionModifier(const ThreadCtrl &threadCtrl_)
1655 : all{static_cast<uint64_t>(threadCtrl_) << 14} {}
1656
1657 constexpr /* implicit */ InstructionModifier(const ConditionModifier &cmod_)
1658 : all{static_cast<uint64_t>(cmod_) << 24} {}
1659
1660 constexpr14 /* implicit */ InstructionModifier(const int &execSize_) : InstructionModifier() {
1661 setExecSize(execSize_);
1662 }
1663 constexpr14 /* implicit */ InstructionModifier(const SWSBInfo &swsb) : InstructionModifier() {
1664 parts.swsb = swsb.all;
1665 }
1666 constexpr14 /* implicit */ InstructionModifier(const SBID &sb) : InstructionModifier(SWSB(sb)) {}
1667
1668protected:
1669 constexpr InstructionModifier(bool accessMode_, bool noDDClr_, bool noDDChk_, unsigned chanOff_, bool accWrCtrl_,
1670 bool debugCtrl_, bool saturate_, bool maskCtrl_, bool autoSWSB_, bool fusionCtrl_, bool eot_)
1671 : all{(uint64_t(accessMode_) << 8) | (uint64_t(noDDClr_) << 9) | (uint64_t(noDDChk_) << 10) | (uint64_t(chanOff_ >> 2) << 11)
1672 | (uint64_t(accWrCtrl_) << 28) | (uint64_t(debugCtrl_) << 30) | (uint64_t(saturate_) << 31)
1673 | (uint64_t(maskCtrl_) << 34) | (uint64_t(autoSWSB_) << 45) | (uint64_t(fusionCtrl_) << 46) | (uint64_t(eot_) << 47)} {}
1674
1675public:
1676 static constexpr InstructionModifier createAccessMode(int accessMode_) {
1677 return InstructionModifier(accessMode_, false, false, 0, false, false, false, false, false, false, false);
1678 }
1679 static constexpr InstructionModifier createNoDDClr() {
1680 return InstructionModifier(false, true, false, 0, false, false, false, false, false, false, false);
1681 }
1682 static constexpr InstructionModifier createNoDDChk() {
1683 return InstructionModifier(false, false, true, 0, false, false, false, false, false, false, false);
1684 }
1685 static constexpr InstructionModifier createChanOff(int offset) {
1686 return InstructionModifier(false, false, false, offset, false, false, false, false, false, false, false);
1687 }
1688 static constexpr InstructionModifier createAccWrCtrl() {
1689 return InstructionModifier(false, false, false, 0, true, false, false, false, false, false, false);
1690 }
1691 static constexpr InstructionModifier createDebugCtrl() {
1692 return InstructionModifier(false, false, false, 0, false, true, false, false, false, false, false);
1693 }
1694 static constexpr InstructionModifier createSaturate() {
1695 return InstructionModifier(false, false, false, 0, false, false, true, false, false, false, false);
1696 }
1697 static constexpr InstructionModifier createMaskCtrl(bool maskCtrl_) {
1698 return InstructionModifier(false, false, false, 0, false, false, false, maskCtrl_, false, false, false);
1699 }
1700 static constexpr InstructionModifier createAutoSWSB() {
1701 return InstructionModifier(false, false, false, 0, false, false, false, false, true, false, false);
1702 }
1703 static constexpr InstructionModifier createSerialized() {
1704 return InstructionModifier(false, false, false, 0, false, false, false, false, false, true, false);
1705 }
1706 static constexpr InstructionModifier createEOT() {
1707 return InstructionModifier(false, false, false, 0, false, false, false, false, false, false, true);
1708 }
1709
1710 friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const InstructionModifier &mod2);
1711 friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const FlagRegister &mod2);
1712 friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const PredCtrl &mod2);
1713
1714 friend constexpr14 InstructionModifier operator^(const InstructionModifier &mod1, const InstructionModifier &mod2);
1715
1716 constexpr14 InstructionModifier operator~() {
1717 InstructionModifier mod = *this;
1718 mod.parts.predInv = ~mod.parts.predInv;
1719 return mod;
1720 }
1721
1722 template <typename T>
1723 InstructionModifier &operator|=(const T &mod) {
1724 *this = *this | mod;
1725 return *this;
1726 }
1727
1728 InstructionModifier &operator^=(const InstructionModifier &mod) {
1729 *this = *this ^ mod;
1730 return *this;
1731 }
1732};
1733
1734inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const InstructionModifier &mod2)
1735{
1736 return InstructionModifier(mod1.all | mod2.all);
1737}
1738
1739
1740inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const FlagRegister &flag)
1741{
1742 InstructionModifier mod = mod1;
1743
1744 mod.parts.flagRegNum1 = flag.getBase() >> 1;
1745 mod.parts.flagRegNum = flag.getBase() & 1;
1746 mod.parts.flagSubRegNum = flag.getOffset();
1747
1748 if (mod.getCMod() == ConditionModifier::none) {
1749 mod.parts.predInv = flag.getNeg();
1750 mod.parts.predCtrl = static_cast<int>(PredCtrl::Normal);
1751 }
1752
1753 return mod;
1754}
1755
1756inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const PredCtrl &mod2)
1757{
1758 InstructionModifier mod = mod1;
1759 mod.parts.predCtrl = static_cast<int>(mod2);
1760 return mod;
1761}
1762
1763inline constexpr14 InstructionModifier operator^(const InstructionModifier &mod1, const InstructionModifier &mod2)
1764{
1765 return InstructionModifier(mod1.all ^ mod2.all);
1766}
1767
1768class Immediate {
1769protected:
1770 uint64_t payload;
1771 DataType type;
1772 bool hiddenType = false;
1773
1774 Immediate(uint64_t payload_, DataType type_) : payload(payload_), type(type_) {}
1775
1776 template <typename T> typename std::enable_if<sizeof(T) == 2>::type setPayload(T imm) {
1777 uint32_t ximm = utils::bitcast<T, uint16_t>(imm);
1778 payload = ximm | (ximm << 16);
1779 }
1780 template <typename T> typename std::enable_if<sizeof(T) == 4>::type setPayload(T imm) {
1781 payload = utils::bitcast<T, uint32_t>(imm);
1782 }
1783 template <typename T> typename std::enable_if<sizeof(T) == 8>::type setPayload(T imm) {
1784 payload = utils::bitcast<T, uint64_t>(imm);
1785 }
1786
1787 template <typename T> void set(T imm) {
1788 setPayload<T>(imm);
1789 type = getDataType<T>();
1790 }
1791
1792 template <typename T> void shrinkSigned(T imm) {
1793 if (imm == T(int16_t(imm))) set<int16_t>(imm);
1794 else if (imm == T(uint16_t(imm))) set<uint16_t>(imm);
1795 else if (imm == T(int32_t(imm))) set<int32_t>(imm);
1796 else if (imm == T(uint32_t(imm))) set<uint32_t>(imm);
1797 else set(imm);
1798 }
1799
1800 template <typename T> void shrinkUnsigned(T imm) {
1801 if (imm == T(uint16_t(imm))) set<uint16_t>(imm);
1802 else if (imm == T(uint32_t(imm))) set<uint32_t>(imm);
1803 else set(imm);
1804 }
1805
1806public:
1807 Immediate() : payload(0), type(DataType::invalid) {}
1808
1809#ifdef NGEN_ASM
1810 static const bool emptyOp = false;
1811#endif
1812
1813 constexpr14 DataType getType() const { return type; }
1814 explicit constexpr14 operator uint64_t() const { return payload; }
1815 constexpr14 int getMods() const { return 0; }
1816 constexpr14 bool isARF() const { return false; }
1817
1818 Immediate &setType(DataType type_) { type = type_; return *this; }
1819
1820 Immediate(uint16_t imm) { set(imm); }
1821 Immediate(int16_t imm) { set(imm); }
1822 Immediate(uint32_t imm) { shrinkUnsigned(imm); }
1823 Immediate(int32_t imm) { shrinkSigned(imm); }
1824 Immediate(uint64_t imm) { shrinkUnsigned(imm); }
1825 Immediate(int64_t imm) { shrinkSigned(imm); }
1826
1827 Immediate(float imm) { set(imm); }
1828 Immediate(double imm) { set(imm); }
1829#ifdef NGEN_HALF_TYPE
1830 Immediate(half imm) { set(imm); }
1831#endif
1832#ifdef NGEN_BFLOAT16_TYPE
1833 Immediate(bfloat16 imm) { set(imm); }
1834#endif
1835
1836 Immediate hideType() const {
1837 Immediate result = *this;
1838 result.hiddenType = true;
1839 return result;
1840 }
1841
1842 static inline Immediate uw(uint16_t imm) { return Immediate(imm); }
1843 static inline Immediate w(int16_t imm) { return Immediate(imm); }
1844 static inline Immediate ud(uint32_t imm) { Immediate i; i.set(imm); return i; }
1845 static inline Immediate d(int32_t imm) { Immediate i; i.set(imm); return i; }
1846 static inline Immediate uq(uint64_t imm) { Immediate i; i.set(imm); return i; }
1847 static inline Immediate q(int64_t imm) { Immediate i; i.set(imm); return i; }
1848 static inline Immediate f(float imm) { return Immediate(imm); }
1849 static inline Immediate df(double imm) { return Immediate(imm); }
1850
1851 static inline Immediate hf(uint16_t f) {
1852 uint32_t fimm = f;
1853 fimm |= (fimm << 16);
1854 return Immediate(fimm, DataType::hf);
1855 }
1856
1857 static inline Immediate bf(uint16_t f) {
1858 uint32_t fimm = f;
1859 fimm |= (fimm << 16);
1860 return Immediate(fimm, DataType::bf);
1861 }
1862
1863protected:
1864 static inline uint32_t toUV(int8_t i) {
1865#ifdef NGEN_SAFE
1866 if (i & 0xF0) throw invalid_immediate_exception();
1867#endif
1868 return i;
1869 }
1870
1871public:
1872 static inline Immediate uv(uint32_t i) {
1873 return Immediate(i, DataType::uv);
1874 }
1875
1876 static inline Immediate uv(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3, uint8_t i4, uint8_t i5, uint8_t i6, uint8_t i7) {
1877 uint32_t payload = (toUV(i0) << 0)
1878 | (toUV(i1) << 4)
1879 | (toUV(i2) << 8)
1880 | (toUV(i3) << 12)
1881 | (toUV(i4) << 16)
1882 | (toUV(i5) << 20)
1883 | (toUV(i6) << 24)
1884 | (toUV(i7) << 28);
1885 return uv(payload);
1886 }
1887
1888protected:
1889 static inline uint32_t toV(int8_t i) {
1890#ifdef NGEN_SAFE
1891 if (i < -8 || i > 7) throw invalid_immediate_exception();
1892#endif
1893 return (i & 0x7) | ((i >> 4) & 0x8);
1894 }
1895
1896public:
1897 static inline Immediate v(uint32_t i) {
1898 return Immediate(i, DataType::v);
1899 }
1900
1901 static inline Immediate v(int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7) {
1902 uint32_t payload = (toV(i0) << 0)
1903 | (toV(i1) << 4)
1904 | (toV(i2) << 8)
1905 | (toV(i3) << 12)
1906 | (toV(i4) << 16)
1907 | (toV(i5) << 20)
1908 | (toV(i6) << 24)
1909 | (toV(i7) << 28);
1910 return v(payload);
1911 }
1912
1913 static inline uint32_t toVF(float f) {
1914 uint32_t fi = utils::bitcast<float, uint32_t>(f);
1915 int exp = (fi >> 23) & 0xFF;
1916 int new_exp = exp - 127 + 3;
1917
1918 if (f == 0.) new_exp = 0;
1919
1920#ifdef NGEN_SAFE
1921 if ((new_exp & ~7) || (fi & 0x0007FFFF))
1922 throw invalid_immediate_exception();
1923#endif
1924
1925 return ((fi >> 24) & 0x80)
1926 | ((new_exp & 0x7) << 4)
1927 | ((fi >> 19) & 0xF);
1928 }
1929
1930 static inline Immediate vf(float f0, float f1, float f2, float f3) {
1931 uint32_t payload = (toVF(f0) << 0)
1932 | (toVF(f1) << 8)
1933 | (toVF(f2) << 16)
1934 | (toVF(f3) << 24);
1935
1936 return Immediate(payload, DataType::vf);
1937 }
1938
1939 void fixup(HW hw, int execSize, DataType defaultType, bool isDest, int arity) const {
1940#ifdef NGEN_SAFE
1941 if (getBytes(type) > (16 >> arity))
1942 throw invalid_immediate_exception();
1943#endif
1944 }
1945
1946 constexpr14 bool isScalar() const {
1947 switch (type) {
1948 case DataType::uv:
1949 case DataType::v:
1950 case DataType::vf:
1951 return false;
1952 default:
1953 return true;
1954 }
1955 }
1956
1957 Immediate forceInt32() const {
1958 auto result = *this;
1959 if (result.type == DataType::uw)
1960 result.set<uint32_t>(uint16_t(payload));
1961 else if (result.type == DataType::w)
1962 result.set<int32_t>(int16_t(payload));
1963 return result;
1964 }
1965
1966#ifdef NGEN_ASM
1967 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
1968#endif
1969};
1970
1971// Compute ctrl field for bfn instruction.
1972// e.g. ctrl = getBFNCtrl([](uint8_t a, uint8_t b, uint8_t c) { return (a & b) | (c & ~b); });
1973template <typename F>
1974inline uint8_t getBFNCtrl(F func) { return func(0xAA, 0xCC, 0xF0); }
1975
1976enum class BarrierType : uint8_t {
1977 ProducerConsumer = 0,
1978 Producer = 1,
1979 Consumer = 2,
1980};
1981
1982/********************************************************************/
1983/* HDC sends */
1984/********************************************************************/
1985union MessageDescriptor {
1986 uint32_t all;
1987 struct {
1988 unsigned funcCtrl : 19; /* SF-dependent */
1989 unsigned header : 1; /* is a header present? */
1990 unsigned responseLen : 5; /* # of GRFs returned: valid range 0-16 */
1991 unsigned messageLen : 4; /* # of GRFs sent in src0: valid range 1-15 */
1992 unsigned : 3;
1993 } parts;
1994 struct {
1995 unsigned index : 8;
1996 unsigned rest : 24;
1997 } bti;
1998 struct {
1999 unsigned index : 8;
2000 unsigned elements : 3;
2001 unsigned subtype : 2;
2002 unsigned subtype2 : 1;
2003 unsigned messageType : 5;
2004 unsigned header : 1;
2005 unsigned responseLen : 5;
2006 unsigned messageLen : 4;
2007 unsigned : 3;
2008 } block;
2009 struct {
2010 unsigned index : 8;
2011 unsigned simd16 : 1;
2012 unsigned legacySIMD : 1;
2013 unsigned elements : 2;
2014 unsigned : 1;
2015 unsigned : 1;
2016 unsigned messageType : 5;
2017 unsigned header : 1;
2018 unsigned responseLen : 5;
2019 unsigned messageLen : 4;
2020 unsigned : 3;
2021 } scattered;
2022 struct {
2023 unsigned index : 8;
2024 unsigned subtype : 2;
2025 unsigned elements : 2;
2026 unsigned simd16 : 1;
2027 unsigned : 1;
2028 unsigned messageType : 5;
2029 unsigned header : 1;
2030 unsigned responseLen : 5;
2031 unsigned messageLen : 4;
2032 unsigned : 3;
2033 } a64_scattered;
2034 struct {
2035 unsigned index : 8;
2036 unsigned atomicOp : 4;
2037 unsigned simd8 : 1; // or data width.
2038 unsigned returnData : 1;
2039 unsigned messageType : 5;
2040 unsigned header : 1;
2041 unsigned responseLen : 5;
2042 unsigned messageLen : 4;
2043 unsigned : 3;
2044 } atomic;
2045 struct {
2046 unsigned index : 8;
2047 unsigned cmask : 4;
2048 unsigned simdMode : 2;
2049 unsigned messageType : 5;
2050 unsigned header : 1;
2051 unsigned responseLen : 5;
2052 unsigned messageLen : 4;
2053 unsigned : 3;
2054 } surface;
2055 struct {
2056 unsigned opcode : 6;
2057 unsigned : 1;
2058 unsigned addrSize : 2;
2059 unsigned dataSize : 3;
2060 unsigned vectSize : 3;
2061 unsigned transpose : 1;
2062 unsigned : 1;
2063 unsigned cache : 3;
2064 unsigned : 9;
2065 unsigned model : 2;
2066 unsigned : 1;
2067 } standardLSC;
2068 struct {
2069 unsigned : 12;
2070 unsigned cmask : 4;
2071 unsigned : 16;
2072 } cmask;
2073 struct {
2074 unsigned : 7;
2075 unsigned vnni : 1;
2076 unsigned : 24;
2077 } block2D;
2078
2079 MessageDescriptor() : all(0) {}
2080 explicit constexpr MessageDescriptor(uint32_t all_) : all(all_) {}
2081};
2082
2083inline constexpr MessageDescriptor operator|(const MessageDescriptor &desc1, const MessageDescriptor &desc2) {
2084 return MessageDescriptor{desc1.all | desc2.all};
2085}
2086
2087union ExtendedMessageDescriptor {
2088 uint32_t all;
2089 struct {
2090 unsigned sfid : 5;
2091 unsigned eot : 1;
2092 unsigned extMessageLen : 5; /* # of GRFs sent in src1: valid range 0-15 (pre-Gen12) */
2093 unsigned : 1;
2094 unsigned : 4; /* Part of exFuncCtrl for non-immediate sends */
2095 unsigned exFuncCtrl : 16;
2096 } parts;
2097 struct {
2098 unsigned : 12;
2099 signed offset : 20;
2100 } flat;
2101 struct {
2102 unsigned : 12;
2103 signed offset : 12;
2104 unsigned index : 8;
2105 } bti;
2106 struct {
2107 unsigned : 6;
2108 unsigned index : 26;
2109 } surface;
2110
2111 ExtendedMessageDescriptor() : all(0) {}
2112 ExtendedMessageDescriptor& operator=(SharedFunction sfid_) { parts.sfid = static_cast<int>(sfid_); return *this; }
2113};
2114
2115enum class AtomicOp : uint16_t {
2116 cmpwr_2w = 0x00,
2117 and_ = 0x1801,
2118 or_ = 0x1902,
2119 xor_ = 0x1A03,
2120 mov = 0x0B04,
2121 inc = 0x0805,
2122 dec = 0x0906,
2123 add = 0x0C07,
2124 sub = 0x0D08,
2125 revsub = 0x09,
2126 imax = 0x0F0A,
2127 imin = 0x0E0B,
2128 umax = 0x110C,
2129 umin = 0x100D,
2130 cmpwr = 0x120E,
2131 predec = 0x000F,
2132 fmax = 0x1611,
2133 fmin = 0x1512,
2134 fcmpwr = 0x1713,
2135 fadd = 0x1314,
2136 fsub = 0x1415,
2137 fadd_64b = 0x1316,
2138 fsub_64b = 0x1417,
2139 load = 0x0A00,
2140 store = mov,
2141 cmpxchg = cmpwr,
2142 fcmpxchg = fcmpwr,
2143};
2144
2145static inline int operandCount(AtomicOp op) {
2146 switch (op) {
2147 case AtomicOp::inc:
2148 case AtomicOp::dec:
2149 case AtomicOp::predec:
2150 case AtomicOp::load:
2151 return 1;
2152 case AtomicOp::cmpwr_2w:
2153 case AtomicOp::cmpwr:
2154 case AtomicOp::fcmpwr:
2155 return 3;
2156 default:
2157 return 2;
2158 }
2159}
2160
2161static inline constexpr bool isFloatAtomicOp(AtomicOp op) {
2162 return static_cast<int>(op) & 0x10;
2163}
2164
2165// Access types.
2166enum class Access {Read, Write, AtomicInteger, AtomicFloat};
2167
2168// Address models.
2169enum AddressModel : uint8_t {
2170 ModelInvalid = 0,
2171 ModelBTS = 1,
2172 ModelA32 = 2,
2173 ModelA64 = 4,
2174 ModelSLM = 8,
2175 ModelCC = 0x10,
2176 ModelSC = 0x20,
2177 ModelScratch = 0x40,
2178 ModelSS = 0x80,
2179 ModelBSS = 0x81,
2180};
2181
2182class AddressBase {
2183protected:
2184 uint32_t index;
2185 AddressModel model;
2186
2187 constexpr AddressBase(uint8_t index_, AddressModel model_) : index(index_), model(model_) {}
2188
2189 static const uint8_t invalidIndex = 0xF0;
2190
2191public:
2192 constexpr AddressBase() : AddressBase(invalidIndex, ModelInvalid) {}
2193
2194 constexpr uint32_t getIndex() const { return index; }
2195 constexpr AddressModel getModel() const { return model; }
2196
2197 void setIndex(uint8_t newIndex) { index = newIndex; }
2198
2199 static constexpr AddressBase createBTS(uint8_t index) {
2200 return AddressBase(index, ModelBTS);
2201 }
2202 static constexpr AddressBase createA32(bool coherent) {
2203 return AddressBase(coherent ? 0xFF : 0xFD, ModelA32);
2204 }
2205 static constexpr AddressBase createA64(bool coherent) {
2206 return AddressBase(coherent ? 0xFF : 0xFD, ModelA64);
2207 }
2208 static constexpr AddressBase createSLM() {
2209 return AddressBase(0xFE, ModelSLM);
2210 }
2211 static constexpr AddressBase createCC(uint8_t index) {
2212 return AddressBase(index, ModelCC);
2213 }
2214 static constexpr AddressBase createSC(uint8_t index) {
2215 return AddressBase(index, ModelSC);
2216 }
2217 static constexpr AddressBase createSS(uint32_t index) {
2218 return AddressBase(index, ModelSS);
2219 }
2220 static constexpr AddressBase createBSS(uint32_t index) {
2221 return AddressBase(index, ModelBSS);
2222 }
2223
2224 inline constexpr bool isRO() const {
2225 return (getModel() == ModelSC || getModel() == ModelCC);
2226 }
2227 inline constexpr bool isStateless() const {
2228 return model & (ModelA32 | ModelA64);
2229 }
2230
2231 void checkModel(uint8_t allowed) { checkModel(static_cast<AddressModel>(allowed)); }
2232 void checkModel(AddressModel allowed) {
2233#ifdef NGEN_SAFE
2234 if (!(model & allowed))
2235 throw invalid_model_exception();
2236#endif
2237 }
2238};
2239
2240
2241class block_hword {
2242protected:
2243 uint8_t count;
2244
2245public:
2246 block_hword(int count_ = 1) : count(count_) {};
2247
2248 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2249 {
2250 int dataGRFCount = count;
2251 if (GRF::bytes(hw) == 64) dataGRFCount = (dataGRFCount + 1) >> 1;
2252
2253 base.checkModel(ModelA64 | ModelBTS | ModelA32 | ModelSLM);
2254 desc.all = 0;
2255 desc.bti.index = base.getIndex();
2256 desc.block.elements = 1 + utils::log2(count);
2257 desc.block.header = true;
2258 desc.block.messageLen = 1;
2259 desc.block.responseLen = dataGRFCount;
2260
2261 if (base.getModel() == ModelA64) {
2262 exdesc = SharedFunction::dc1;
2263 desc.block.subtype = 0x3;
2264 desc.block.messageType = (access == Access::Write) ? 0x15 : 0x14;
2265 } else {
2266 exdesc = SharedFunction::dc0;
2267 desc.block.messageType = 0x1;
2268 desc.block.subtype2 = 1;
2269 }
2270 }
2271};
2272
2273class block_oword {
2274protected:
2275 uint8_t count;
2276 uint8_t highHalf;
2277
2278 constexpr block_oword(uint8_t count_, bool highHalf_) : count(count_), highHalf(highHalf_) {}
2279
2280public:
2281 block_oword(int count_ = 1) : count(count_), highHalf(false) {}
2282 static block_oword high() { return block_oword(1, true); }
2283
2284 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2285 {
2286 int dataGRFCount = (GRF::bytes(hw) == 64) ? (count + 3) >> 2 : (count + 1) >> 1;
2287
2288 base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelCC | ModelSLM);
2289 exdesc = (base.getModel() == ModelCC) ? SharedFunction::dcro :
2290 (base.getModel() == ModelA64) ? SharedFunction::dc1 :
2291 SharedFunction::dc0;
2292
2293 desc.all = 0;
2294 desc.bti.index = base.getIndex();
2295 desc.parts.header = true;
2296 desc.parts.messageLen = 1;
2297 desc.parts.responseLen = dataGRFCount;
2298 desc.block.elements = (count == 1) ? highHalf : (1 + utils::log2(count));
2299
2300 if (base.getModel() == ModelA64)
2301 desc.block.messageType = (access == Access::Write) ? 0x15 : 0x14;
2302 else
2303 desc.block.messageType = (access == Access::Write) << 3;
2304 }
2305};
2306
2307class aligned_block_oword {
2308protected:
2309 uint8_t count;
2310 uint8_t highHalf;
2311
2312 constexpr aligned_block_oword(uint8_t count_, bool highHalf_) : count(count_), highHalf(highHalf_) {}
2313
2314public:
2315 aligned_block_oword(int count_ = 1) : count(count_), highHalf(false) {}
2316 static aligned_block_oword high() { return aligned_block_oword(1, true); }
2317
2318 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2319 {
2320 int dataGRFCount = (GRF::bytes(hw) == 64) ? (count + 3) >> 2 : (count + 1) >> 1;
2321
2322 base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelCC | ModelSLM | ModelSC);
2323 exdesc = (base.getModel() == ModelCC || base.getModel() == ModelSC) ? SharedFunction::dcro :
2324 (base.getModel() == ModelA64) ? SharedFunction::dc1 :
2325 SharedFunction::dc0;
2326
2327 desc.all = 0;
2328 desc.bti.index = base.getIndex();
2329 desc.parts.header = true;
2330 desc.parts.messageLen = 1;
2331 desc.parts.responseLen = dataGRFCount;
2332 desc.block.elements = (count == 1) ? highHalf : (1 + utils::log2(count));
2333
2334 if (base.getModel() == ModelA64) {
2335 desc.block.messageType = (access == Access::Write) ? 0x15 : 0x14;
2336 desc.block.subtype = 1;
2337 } else if (base.getModel() == ModelSC)
2338 desc.block.messageType = 4;
2339 else
2340 desc.block.messageType = ((access == Access::Write) << 3) + 1;
2341 }
2342};
2343
2344class scattered_byte {
2345protected:
2346 uint8_t count;
2347
2348public:
2349 scattered_byte(int count_ = 1) : count(count_) {}
2350
2351 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2352 {
2353 bool a64 = (base.getModel() == ModelA64);
2354 int simd16 = mod.getExecSize() >> 4;
2355 int dataGRFCount = 1 + simd16;
2356 int addrGRFCount = dataGRFCount << int(a64);
2357 if (GRF::bytes(hw) == 64) {
2358 dataGRFCount = 1;
2359 addrGRFCount = 1 << int(a64);
2360 simd16 = 1;
2361 }
2362
2363 base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2364 desc.all = 0;
2365 desc.bti.index = base.getIndex();
2366 desc.parts.header = false;
2367 desc.parts.messageLen = addrGRFCount;
2368 desc.parts.responseLen = dataGRFCount;
2369
2370 if (a64) {
2371 exdesc = SharedFunction::dc1;
2372 desc.a64_scattered.elements = utils::log2(count);
2373 desc.a64_scattered.simd16 = simd16;
2374 desc.a64_scattered.subtype = 0;
2375 } else {
2376 exdesc = SharedFunction::dc0;
2377 desc.scattered.elements = utils::log2(count);
2378 desc.scattered.simd16 = simd16;
2379 }
2380
2381 if (access == Access::Write)
2382 desc.scattered.messageType = a64 ? 0x1A : 0xC;
2383 else
2384 desc.scattered.messageType = a64 ? 0x10 : 0x4;
2385 }
2386};
2387
2388class scattered_atomic {
2389public:
2390 void applyAtomicOp(AtomicOp op, const RegData &dst, MessageDescriptor &desc) const
2391 {
2392 desc.atomic.returnData = !dst.isNull();
2393 desc.atomic.atomicOp = static_cast<int>(op) & 0xF;
2394 }
2395};
2396
2397class scattered_word : public scattered_atomic {
2398public:
2399 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2400 {
2401 bool a64 = (base.getModel() == ModelA64);
2402 int simd16 = mod.getExecSize() >> 4;
2403 int addrGRFCount = (1 + simd16) << int(a64);
2404 int dataGRFCount = 1 + simd16;
2405 if (GRF::bytes(hw) == 64) {
2406 addrGRFCount = 1 << int(a64);
2407 dataGRFCount = 1;
2408 simd16 = 1;
2409 }
2410
2411#ifdef NGEN_SAFE
2412 if (!(access == Access::AtomicInteger || access == Access::AtomicFloat))
2413 throw invalid_load_store_exception();
2414#endif
2415 base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2416 exdesc = SharedFunction::dc1;
2417 desc.all = 0;
2418 desc.bti.index = base.getIndex();
2419 desc.parts.header = false;
2420 desc.parts.messageLen = addrGRFCount;
2421 desc.parts.responseLen = dataGRFCount;
2422
2423 if (access == Access::AtomicFloat)
2424 desc.atomic.messageType = a64 ? 0x1E : 0x1C;
2425 else
2426 desc.atomic.messageType = a64 ? 0x13 : 0x03;
2427
2428 desc.atomic.simd8 = a64 ? 0 : !simd16;
2429 }
2430};
2431
2432class scattered_dword : public scattered_atomic {
2433protected:
2434 uint8_t count;
2435
2436public:
2437 scattered_dword(int count_ = 1) : count(count_) {}
2438
2439 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2440 {
2441 bool a64 = (base.getModel() == ModelA64);
2442 int simd16 = mod.getExecSize() >> 4;
2443 int addrGRFCount = (1 + simd16) << int(a64);
2444 int dataGRFCount = count * (1 + simd16);
2445 if (GRF::bytes(hw) == 64) {
2446 addrGRFCount = 1 << int(a64);
2447 dataGRFCount = count;
2448 simd16 = 1;
2449 }
2450
2451 desc.all = 0;
2452 desc.bti.index = base.getIndex();
2453 desc.parts.header = false;
2454 desc.parts.messageLen = addrGRFCount;
2455 desc.parts.responseLen = dataGRFCount;
2456
2457 if (access == Access::AtomicInteger || access == Access::AtomicFloat) {
2458 base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2459 exdesc = SharedFunction::dc1;
2460 if (access == Access::AtomicFloat)
2461 desc.atomic.messageType = a64 ? 0x1D : 0x1B;
2462 else
2463 desc.atomic.messageType = a64 ? 0x12 : 0x02;
2464 desc.atomic.simd8 = a64 ? 0 : !simd16;
2465 } else if (a64) {
2466 exdesc = SharedFunction::dc1;
2467 desc.a64_scattered.elements = utils::log2(count);
2468 desc.a64_scattered.simd16 = simd16;
2469 desc.a64_scattered.subtype = 0x1;
2470 desc.a64_scattered.messageType = (access == Access::Write) ? 0x1A : 0x10;
2471 } else {
2472 base.checkModel(ModelA32 | ModelBTS | ModelCC);
2473 exdesc = (base.getModel() == ModelCC) ? SharedFunction::dcro : SharedFunction::dc0;
2474 desc.scattered.elements = utils::log2(count);
2475 desc.scattered.legacySIMD = 1;
2476 desc.scattered.simd16 = simd16;
2477 desc.scattered.messageType = (access == Access::Write) ? 0xB : 0x3;
2478 }
2479 }
2480};
2481
2482class scattered_qword : public scattered_atomic {
2483protected:
2484 uint8_t count;
2485
2486public:
2487 scattered_qword(int count_ = 1) : count(count_) {}
2488
2489 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2490 {
2491 bool a64 = (base.getModel() == ModelA64);
2492 int simd16 = mod.getExecSize() >> 4;
2493 int addrGRFCount = (1 + simd16) << int(a64);
2494 int dataGRFCount = count * 2 * (1 + simd16);
2495 if (GRF::bytes(hw) == 64) {
2496 addrGRFCount = 1 << int(a64);
2497 dataGRFCount = count * 2;
2498 simd16 = 1;
2499 }
2500
2501 base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2502 desc.all = 0;
2503 desc.bti.index = base.getIndex();
2504 desc.parts.header = false;
2505 desc.parts.messageLen = addrGRFCount;
2506 desc.parts.responseLen = dataGRFCount;
2507
2508 if (access == Access::AtomicInteger || access == Access::AtomicFloat) {
2509 // Note: atomics have same encoding as scattered dword. The atomic operation type
2510 // determines the length. The one exception is A64 atomic float.
2511 exdesc = SharedFunction::dc1;
2512 if (access == Access::AtomicFloat) {
2513 desc.atomic.messageType = a64 ? 0x1D : 0x1B;
2514 desc.atomic.simd8 = a64 ? 0 : !simd16;
2515 } else {
2516 desc.atomic.messageType = a64 ? 0x12 : 0x02;
2517 desc.atomic.simd8 = a64 ? 1 : !simd16;
2518 }
2519 } else if (a64) {
2520 exdesc = SharedFunction::dc1;
2521 desc.a64_scattered.elements = utils::log2(count);
2522 desc.a64_scattered.simd16 = simd16;
2523 desc.a64_scattered.subtype = 0x2;
2524 desc.a64_scattered.messageType = (access == Access::Write) ? 0x1A : 0x10;
2525 } else {
2526 exdesc = SharedFunction::dc0;
2527 desc.scattered.elements = utils::log2(count);
2528 desc.scattered.legacySIMD = 1;
2529 desc.scattered.simd16 = simd16;
2530 desc.scattered.messageType = (access == Access::Write) ? 0xD : 0x5;
2531 }
2532 }
2533};
2534
2535class surface_dword {
2536protected:
2537 ChannelMask cmask;
2538 bool structured;
2539
2540public:
2541 surface_dword(ChannelMask cmask_ = ChannelMask::r, bool structured_ = false) : cmask(cmask_), structured(structured_) {}
2542
2543 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2544 {
2545 int simd16 = mod.getExecSize() >> 4;
2546 if (GRF::bytes(hw) == 64) simd16 = 1;
2547 int nChannels = utils::popcnt(0xF ^ static_cast<int8_t>(cmask));
2548 bool isA64 = base.getModel() == ModelA64;
2549 int addrGRFCount = (1 + simd16) << int(isA64) << int(structured);
2550 int dataGRFCount = nChannels * (1 + simd16);
2551 if (GRF::bytes(hw) == 64) {
2552 addrGRFCount = (addrGRFCount + 1) >> 1;
2553 dataGRFCount = (dataGRFCount + 1) >> 1;
2554 }
2555
2556 base.checkModel(ModelBTS | ModelA32 | ModelA64 | ModelSLM);
2557
2558 exdesc = SharedFunction::dc1;
2559
2560 desc.all = 0;
2561 desc.bti.index = base.getIndex();
2562 desc.parts.header = false;
2563 desc.parts.messageLen = addrGRFCount;
2564 desc.parts.responseLen = dataGRFCount;
2565 desc.surface.messageType = (isA64 << 4) | ((access == Access::Write) << 3) | 0x01;
2566 desc.surface.cmask = static_cast<int>(cmask);
2567 desc.surface.simdMode = 2 - simd16;
2568 }
2569};
2570
2571class media_block {
2572protected:
2573 bool vls_override;
2574 uint8_t vls_offset;
2575 uint8_t width;
2576 uint8_t height;
2577
2578public:
2579 media_block(int width_, int height_) : vls_override(false), vls_offset(0),
2580 width(width_), height(height_) {}
2581 media_block(int width_, int height_, int vls_offset_) : vls_override(true),
2582 vls_offset(vls_offset_), width(width_), height(height_) {}
2583 media_block() : media_block(0, 0) {}
2584
2585 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2586 {
2587 exdesc = SharedFunction::dc1;
2588 desc.all = 0;
2589 desc.bti.index = base.getIndex();
2590 desc.block.messageType = (base.getModel() == ModelSC) ? 0x05 :
2591 (access == Access::Write) ? 0x0A :
2592 0x04;
2593 desc.block.elements = (vls_override << 2) | (vls_offset & 1);
2594 desc.block.header = true;
2595
2596 int dataGRFCount = 0;
2597 if (width > 0) {
2598 int lg2_rows_per_2grf = std::min<int>(4, 6 - utils::bsr(width));
2599 dataGRFCount = utils::roundup_pow2((height + (1 << lg2_rows_per_2grf) - 1) >> lg2_rows_per_2grf);
2600 }
2601
2602 desc.parts.responseLen = dataGRFCount;
2603 desc.parts.messageLen = 1;
2604 }
2605};
2606
2607/********************************************************************/
2608/* New dataport messages. */
2609/********************************************************************/
2610enum class LSCOpcode : uint8_t {
2611 load = 0,
2612 load_cmask = 2,
2613 store = 4,
2614 store_cmask = 6,
2615 atomic_inc = 8,
2616 atomic_dec = 9,
2617 atomic_load = 0xA,
2618 atomic_store = 0xB,
2619 atomic_add = 0xC,
2620 atomic_sub = 0xD,
2621 atomic_min = 0xE,
2622 atomic_max = 0xF,
2623 atomic_umin = 0x10,
2624 atomic_umax = 0x11,
2625 atomic_cmpxchg = 0x12,
2626 atomic_fadd = 0x13,
2627 atomic_fsub = 0x14,
2628 atomic_fmin = 0x15,
2629 atomic_fmax = 0x16,
2630 atomic_fcmpxchg = 0x17,
2631 atomic_and = 0x18,
2632 atomic_or = 0x19,
2633 atomic_xor = 0x1A,
2634 load_status = 0x1B,
2635 store_uncompressed = 0x1C,
2636 ccs_update = 0x1D,
2637 rsi = 0x1E,
2638 fence = 0x1F,
2639 load_block = 1,
2640 load_2dblock = 3,
2641 store_block = 5,
2642 store_2dblock = 7,
2643};
2644
2645enum class DataSizeLSC : uint16_t {
2646 D8 = 0x0100,
2647 D16 = 0x0201,
2648 D32 = 0x0402,
2649 D64 = 0x0803,
2650 D8U32 = 0x0404,
2651 D16U32 = 0x0405,
2652};
2653
2654static inline constexpr unsigned getRegisterWidth(DataSizeLSC dsize) {
2655 return static_cast<uint16_t>(dsize) >> 8;
2656}
2657
2658enum class CacheSettingsLSC : uint8_t {
2659 Default = 0,
2660 L1UC_L3UC = 1,
2661 L1UC_L3C = 2, L1UC_L3WB = 2,
2662 L1C_L3UC = 3, L1WT_L3UC = 3,
2663 L1C_L3C = 4, L1WT_L3WB = 4,
2664 L1S_L3UC = 5,
2665 L1S_L3C = 6, L1S_L3WB = 6,
2666 L1IAR_L3C = 7, L1WB_L3WB = 7,
2667};
2668
2669struct DataSpecLSC {
2670 MessageDescriptor desc;
2671 uint8_t vcount = 0;
2672 uint8_t dbytes = 0;
2673
2674 enum { AddrSize16 = 1, AddrSize32 = 2, AddrSize64 = 3 };
2675 enum { AddrFlat = 0, AddrSS = 1, AddrBSS = 2, AddrBTI = 3 };
2676
2677 explicit constexpr DataSpecLSC(MessageDescriptor desc_, uint8_t vcount_ = 0, uint8_t dbytes_ = 0) : desc(desc_), vcount(vcount_), dbytes(dbytes_) {}
2678 /* implicit */ DataSpecLSC(ChannelMask m) {
2679 desc.standardLSC.opcode = static_cast<uint8_t>(LSCOpcode::load_cmask);
2680 desc.cmask.cmask = static_cast<uint8_t>(m) ^ 0xF;
2681 vcount = utils::popcnt(desc.cmask.cmask);
2682 }
2683 /* implicit */ DataSpecLSC(CacheSettingsLSC s) {
2684 desc.standardLSC.cache = static_cast<unsigned>(s);
2685 }
2686 /* implicit */ constexpr DataSpecLSC(DataSizeLSC d) : desc((static_cast<uint32_t>(d) & 0x7) << 9), dbytes(getRegisterWidth(d)) {}
2687
2688 DataSpecLSC operator()(int vcount) const {
2689 auto vsEncoded = (vcount <= 4) ? (vcount - 1) : (utils::log2(vcount) + 1);
2690 return *this | createV(vcount, vsEncoded);
2691 }
2692 friend inline constexpr DataSpecLSC operator|(const DataSpecLSC &s1, const DataSpecLSC &s2);
2693 constexpr14 DataSpecLSC &operator|=(const DataSpecLSC &other) {
2694 *this = *this | other;
2695 return *this;
2696 }
2697
2698 static constexpr DataSpecLSC createV(unsigned vcount, unsigned venc) { return DataSpecLSC{MessageDescriptor(venc << 12), uint8_t(vcount), 0}; }
2699 static constexpr DataSpecLSC createTranspose() { return DataSpecLSC{MessageDescriptor(1 << 15)}; }
2700 static constexpr DataSpecLSC createVNNI() { return DataSpecLSC{MessageDescriptor(1 << 7)}; }
2701
2702 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const GRFDisp &addr) const
2703 {
2704 bool a64 = (base.getModel() == ModelA64);
2705 desc = this->desc;
2706 exdesc = (base.getModel() == ModelSLM) ? SharedFunction::slm : SharedFunction::ugm;
2707
2708 desc.standardLSC.addrSize = a64 ? AddrSize64 : AddrSize32;
2709
2710 if (base.getModel() == ModelA32) base = AddressBase::createBTS(0xFF);
2711
2712 switch (base.getModel()) {
2713 case ModelA64:
2714 case ModelSLM:
2715 desc.standardLSC.model = AddrFlat;
2716 exdesc.flat.offset = addr.getDisp();
2717 break;
2718 case ModelBTS:
2719 desc.standardLSC.model = AddrBTI;
2720 exdesc.bti.index = base.getIndex();
2721 exdesc.bti.offset = addr.getDisp();
2722 break;
2723 case ModelSS:
2724 case ModelBSS:
2725 desc.standardLSC.model = (base.getModel() == ModelSS ? AddrSS : AddrBSS);
2726 exdesc.surface.index = base.getIndex();
2727 break;
2728 default:
2729#ifdef NGEN_SAFE
2730 throw invalid_model_exception();
2731#endif
2732 break;
2733 }
2734
2735 auto vc = std::max<unsigned>(vcount, 1);
2736 if (this->desc.standardLSC.transpose && !desc.standardLSC.opcode) {
2737 desc.parts.messageLen = 1;
2738 desc.parts.responseLen = GRF::bytesToGRFs(hw, dbytes * vc);
2739 } else {
2740 auto effSIMDGRFs = 1 + ((mod.getExecSize()) >> (GRF::log2Bytes(hw) - 1));
2741 desc.parts.messageLen = effSIMDGRFs * (a64 ? 2 : 1);
2742 desc.parts.responseLen = effSIMDGRFs * vc * (1 + (dbytes >> 3));
2743 }
2744
2745 if (access == Access::Write)
2746 desc.standardLSC.opcode |= static_cast<uint8_t>(LSCOpcode::store);
2747 }
2748
2749 void applyAtomicOp(AtomicOp op, const RegData &dst, MessageDescriptor &desc) const
2750 {
2751 desc.standardLSC.opcode = static_cast<uint16_t>(op) >> 8;
2752 }
2753};
2754
2755static inline DataSpecLSC scattered(const DataSpecLSC &dtype, int vsize = 1) { return dtype(vsize); }
2756static inline DataSpecLSC block(const DataSpecLSC &dtype, int vsize = 1) { return dtype(vsize) | DataSpecLSC::createTranspose(); }
2757
2758inline constexpr DataSpecLSC operator|(const DataSpecLSC &s1, const DataSpecLSC &s2) {
2759 return DataSpecLSC{s1.desc | s2.desc, uint8_t(s1.vcount | s2.vcount), uint8_t(s1.dbytes | s2.dbytes)};
2760}
2761
2762class block_2d : public DataSpecLSC {
2763protected:
2764 uint8_t width, height, count;
2765
2766public:
2767 block_2d(const DataSpecLSC &dtype_, int width_, int height_, int count_ = 1) : DataSpecLSC(dtype_), width(width_), height(height_), count(count_) {}
2768
2769 friend block_2d operator|(block_2d left, const DataSpecLSC &right) {
2770 left.DataSpecLSC::operator|=(right);
2771 return left;
2772 }
2773
2774 template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const GRFDisp &addr) const
2775 {
2776 base.checkModel(ModelA64);
2777
2778 desc = this->desc;
2779
2780 desc.standardLSC.opcode = static_cast<uint8_t>((access == Access::Write) ? LSCOpcode::store_2dblock : LSCOpcode::load_2dblock);
2781 desc.standardLSC.model = AddrFlat;
2782
2783 auto w = width, h = height;
2784 if (this->desc.standardLSC.transpose) std::swap(w, h);
2785 desc.parts.messageLen = 1;
2786 desc.parts.responseLen = std::min(count * GRF::bytesToGRFs(hw, utils::roundup_pow2(w) * h * this->dbytes), 31);
2787
2788 exdesc = SharedFunction::ugm;
2789 exdesc.flat.offset = addr.getDisp();
2790 }
2791};
2792
2793// Generate descriptors for a load operation.
2794template <typename DataSpec, typename Addr>
2795static inline void encodeLoadDescriptors(HW hw, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc,
2796 const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const Addr &addr)
2797{
2798 spec.template getDescriptors<Access::Read>(hw, mod, base, desc, exdesc, addr);
2799 if (dst.isNull())
2800 desc.parts.responseLen = 0;
2801}
2802
2803// Generate descriptors for a store operation. Requires split send for pre-Gen12.
2804template <typename DataSpec, typename Addr>
2805static inline void encodeStoreDescriptors(HW hw, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc,
2806 const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const Addr &addr)
2807{
2808#ifdef NGEN_SAFE
2809 if (base.isRO()) throw read_only_exception();
2810#endif
2811
2812 spec.template getDescriptors<Access::Write>(hw, mod, base, desc, exdesc, addr);
2813 exdesc.parts.extMessageLen = desc.parts.responseLen;
2814 desc.parts.responseLen = 0;
2815}
2816
2817// Generate descriptors for an atomic operation. Requires split send for binary and ternary atomics pre-Gen12.
2818template <typename DataSpec, typename Addr>
2819static inline void encodeAtomicDescriptors(HW hw, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc,
2820 AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const Addr &addr)
2821{
2822 if (isFloatAtomicOp(op))
2823 spec.template getDescriptors<Access::AtomicFloat>(hw, mod, base, desc, exdesc, addr);
2824 else
2825 spec.template getDescriptors<Access::AtomicInteger>(hw, mod, base, desc, exdesc, addr);
2826
2827 spec.applyAtomicOp(op, dst, desc);
2828
2829 exdesc.parts.extMessageLen = desc.parts.responseLen * (operandCount(op) - 1);
2830 if (dst.isNull())
2831 desc.parts.responseLen = 0;
2832}
2833
2834} /* namespace ngen */
2835
2836
2837#endif /* header guard */
2838