1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17/*
18 * Do not #include this file directly; ngen uses it internally.
19 */
20
21// Gen12 binary encoding.
22
23struct EncodingTag12 {};
24struct EncodingTagXeHPC {};
25template <HW hw> struct EncodingTag12Dispatch { using tag = EncodingTag12; };
26template <> struct EncodingTag12Dispatch<HW::XeHPC> { using tag = EncodingTagXeHPC; };
27
28class SWSBInfo12
29{
30 friend class InstructionModifier;
31protected:
32 union {
33 struct {
34 unsigned dist : 3;
35 unsigned pipe : 4;
36 unsigned combined : 1;
37 } pipeline;
38 struct {
39 unsigned sbid : 4;
40 unsigned mode : 3;
41 unsigned combined : 1;
42 } scoreboard;
43 struct {
44 unsigned sbid : 4;
45 unsigned dist : 3;
46 unsigned combined : 1;
47 } combined;
48 uint8_t all;
49 };
50
51 constexpr SWSBInfo12(uint8_t all_, bool dummy) : all{all_} {}
52
53 constexpr bool isPipeline() const {
54 return !combined.combined && ((scoreboard.mode < 2) || (scoreboard.mode > 4));
55 }
56
57public:
58 constexpr SWSBInfo12() : all{0} {}
59
60 SWSBInfo12(SWSBInfo info, Opcode op) {
61 if (info.hasDist() && info.hasToken()) {
62 combined.sbid = info.parts.token;
63 combined.dist = info.parts.dist;
64 combined.combined = true;
65 } else if (info.hasDist()) {
66 combined.combined = false;
67 uint8_t pipeMap[8] = {0, 1, 2, 3, 10, 0, 0, 0};
68 pipeline.dist = info.parts.dist;
69 pipeline.pipe = pipeMap[info.parts.pipe & 7];
70 } else if (info.hasToken()) {
71 combined.combined = false;
72 combined.sbid = info.parts.token;
73 scoreboard.mode = 1 + info.tokenMode();
74 } else
75 all = 0;
76 }
77
78 SWSBInfo decode(Opcode op) const {
79 if (combined.combined) {
80 bool vl = isVariableLatency(HW::Gen12LP, op);
81 auto pipe = (op == Opcode::send || op == Opcode::sendc) ? Pipe::A : Pipe::Default;
82 return SWSBInfo(combined.sbid, vl, true) | SWSBInfo(pipe, combined.dist);
83 } else if (isPipeline()) {
84 static const Pipe pipeMap[4] = {Pipe::Default, Pipe::A, Pipe::F, Pipe::I};
85 auto pipe = (pipeline.pipe == 10) ? Pipe::L : pipeMap[pipeline.pipe & 3];
86 return SWSBInfo(pipe, pipeline.dist);
87 } else
88 return SWSBInfo(scoreboard.sbid, scoreboard.mode != 2, scoreboard.mode != 3);
89 }
90
91 constexpr bool empty() const { return all == 0; }
92 constexpr uint8_t raw() const { return all; }
93 static constexpr14 SWSBInfo12 createFromRaw(uint8_t all_) { return SWSBInfo12(all_, false); }
94};
95
96class SWSBInfoXeHPC
97{
98 friend class InstructionModifier;
99protected:
100 union {
101 struct {
102 unsigned dist : 3;
103 unsigned pipe : 4;
104 unsigned sb : 1;
105 unsigned mode : 2;
106 unsigned : 6;
107 } pipeline;
108 struct {
109 unsigned sbid : 5;
110 unsigned type : 2; // .dst: 0, .src: 1, .set: 2
111 unsigned sb : 1;
112 unsigned mode : 2;
113 unsigned : 6;
114 } scoreboard;
115 struct {
116 unsigned sbid : 5;
117 unsigned dist : 3;
118 unsigned mode : 2;
119 unsigned : 6;
120 } combined;
121 uint16_t all;
122 };
123
124 constexpr SWSBInfoXeHPC(uint16_t all_, bool dummy) : all{all_} {}
125
126 static constexpr14 unsigned combinedMode(SWSBInfo info, Opcode op) {
127 auto pipe = info.getPipe();
128 if (info.parts.src && info.parts.dst)
129 return (pipe == Pipe::F) ? 2 : (pipe == Pipe::I) ? 3 : 1;
130 if (info.parts.src) return 2;
131 if (info.parts.dst) return (pipe == Pipe::A || op == Opcode::dpas) ? 3 : 1;
132 return 0;
133 }
134
135public:
136 constexpr SWSBInfoXeHPC() : all{0} {}
137
138 SWSBInfoXeHPC(SWSBInfo info, Opcode op) {
139 if (info.hasDist() && info.hasToken()) {
140 combined.sbid = info.parts.token;
141 combined.dist = info.parts.dist;
142 combined.mode = combinedMode(info, op);
143 } else if (info.hasDist()) {
144 pipeline.dist = info.parts.dist;
145 pipeline.pipe = info.parts.pipe;
146 pipeline.sb = false;
147 pipeline.mode = 0;
148 } else if (info.hasToken()) {
149 scoreboard.sbid = info.parts.token;
150 scoreboard.type = info.tokenMode() - 1;
151 scoreboard.sb = true;
152 scoreboard.mode = 0;
153 } else if (info.parts.noacc)
154 all = 0xF0;
155 else
156 all = 0;
157 }
158
159 SWSBInfo decode(Opcode op) const {
160 if (all == 0xF0)
161 return SWSBInfo::createNoAccSBSet();
162
163 auto result = SWSBInfo(pipe(op), dist());
164 if (combined.mode) {
165 bool src, dst;
166 if (op == Opcode::send || op == Opcode::sendc)
167 src = dst = true;
168 else if (op == Opcode::dpas) {
169 src = (combined.mode <= 2);
170 dst = combined.mode & 1;
171 } else {
172 dst = combined.mode & 1;
173 src = !dst;
174 }
175 result = result | SWSBInfo(combined.sbid, src, dst);
176 } else if (scoreboard.sb)
177 result = result | SWSBInfo(scoreboard.sbid, scoreboard.type != 0, scoreboard.type != 1);
178
179 return result;
180 }
181
182 constexpr bool empty() const { return all == 0; }
183 constexpr14 int dist() const {
184 if (combined.mode)
185 return combined.dist;
186 else if (!scoreboard.sb)
187 return pipeline.dist;
188 else
189 return 0;
190 }
191 constexpr14 Pipe pipe(Opcode op) const {
192 if (combined.mode) {
193 if (op == Opcode::send || op == Opcode::sendc)
194 return (combined.mode == 1) ? Pipe::A : (combined.mode == 2) ? Pipe::F : Pipe::I;
195 if (op == Opcode::dpas)
196 return Pipe::Default;
197 return (combined.mode == 3) ? Pipe::A : Pipe::Default;
198 } else if (!scoreboard.sb) {
199 const Pipe table[8] = {Pipe::Default, Pipe::A, Pipe::F, Pipe::I, Pipe::L, Pipe::M, Pipe::A, Pipe::A};
200 return table[pipeline.pipe];
201 } else
202 return Pipe::Default;
203 }
204
205 constexpr uint16_t raw() const { return all; }
206 static constexpr14 SWSBInfoXeHPC createFromRaw(uint16_t all_) { return SWSBInfoXeHPC(all_, false); }
207};
208
209// 24 bits of data common between src0 and src1 (lower 16 bits common with dst)
210union BinaryOperand12 {
211 uint32_t bits;
212 struct {
213 unsigned hs : 2;
214 unsigned regFile : 1;
215 unsigned subRegNum : 5;
216 unsigned regNum : 8;
217 unsigned addrMode : 1; // = 0 (direct)
218 unsigned width : 3;
219 unsigned vs : 4;
220 } direct;
221 struct {
222 unsigned hs : 2;
223 unsigned addrOff : 10;
224 unsigned addrReg : 4;
225 unsigned addrMode : 1; // = 1 (indirect)
226 unsigned width : 3;
227 unsigned vs : 4;
228 } indirect;
229 struct {
230 unsigned : 20;
231 unsigned vs : 3;
232 unsigned subRegNum0 : 1;
233 } directXeHPC;
234 struct {
235 unsigned : 20;
236 unsigned vs : 3;
237 unsigned addrOff0 : 1;
238 } indirectXeHPC;
239};
240
241// 16 bits of data common between dst, src0/1/2 for 3-source instructions
242union TernaryOperand12 {
243 uint16_t bits;
244 struct {
245 unsigned hs : 2;
246 unsigned regFile : 1;
247 unsigned subRegNum : 5; // mme# for math
248 unsigned regNum : 8;
249 } direct;
250};
251
252struct Instruction12 {
253 union {
254 struct { // Lower 35 bits are essentially common.
255 unsigned opcode : 8; // High bit reserved, used for auto-SWSB flag.
256 unsigned swsb : 8;
257 unsigned execSize : 3;
258 unsigned execOffset : 3;
259 unsigned flagReg : 2;
260 unsigned predCtrl : 4;
261 unsigned predInv : 1;
262 unsigned cmptCtrl : 1;
263 unsigned debugCtrl : 1;
264 unsigned maskCtrl : 1;
265 //
266 unsigned atomicCtrl : 1;
267 unsigned accWrCtrl : 1;
268 unsigned saturate : 1;
269 unsigned : 29;
270 //
271 unsigned : 32;
272 unsigned : 32;
273 } common;
274 struct {
275 unsigned : 8;
276 unsigned swsb : 10;
277 unsigned execSize : 3;
278 unsigned flagReg : 3;
279 unsigned execOffset : 2;
280 unsigned predCtrl : 2;
281 unsigned : 4;
282 //
283 unsigned : 1;
284 unsigned dstExt : 1; // Low bit of subRegNum [direct] or addrOff [indirect]
285 unsigned : 30;
286 //
287 unsigned : 32;
288 unsigned : 32;
289 } commonXeHPC;
290 struct {
291 unsigned : 32;
292 //
293 unsigned : 3;
294 unsigned dstAddrMode : 1;
295 unsigned dstType : 4;
296 unsigned src0Type : 4;
297 unsigned src0Mods : 2;
298 unsigned src0Imm : 1;
299 unsigned src1Imm : 1;
300 unsigned dst : 16; // first 16 bits of BinaryOperand12
301 //
302 unsigned src0 : 24; // BinaryOperand12
303 unsigned src1Type : 4;
304 unsigned cmod : 4;
305 //
306 unsigned src1 : 24; // BinaryOperand12
307 unsigned src1Mods : 2;
308 unsigned _ : 6;
309 } binary;
310 struct {
311 uint64_t _;
312 uint32_t __;
313 uint32_t value;
314 } imm32;
315 struct {
316 uint64_t _;
317 uint32_t high;
318 uint32_t low;
319 } imm64;
320 struct {
321 unsigned : 32; // common
322 unsigned : 3;
323 unsigned src0VS0 : 1;
324 unsigned dstType : 3;
325 unsigned execType : 1;
326 unsigned src0Type : 3;
327 unsigned src0VS1 : 1;
328 unsigned src0Mods : 2;
329 unsigned src0Imm : 1;
330 unsigned src2Imm : 1;
331 unsigned dst : 16; // TernaryOperand12 or immediate
332 //
333 unsigned src0 : 16;
334 unsigned src2Type : 3;
335 unsigned src1VS0 : 1;
336 unsigned src2Mods : 2;
337 unsigned src1Mods : 2;
338 unsigned src1Type : 3;
339 unsigned src1VS1 : 1;
340 unsigned cmod : 4; // same location as binary
341 //
342 unsigned src1 : 16; // TernaryOperand12
343 unsigned src2 : 16; // TernaryOperand12 or immediate
344 } ternary;
345 struct {
346 unsigned : 32;
347 unsigned : 32;
348 unsigned : 20;
349 unsigned bfnCtrl03 : 4;
350 unsigned : 4;
351 unsigned bfnCtrl47 : 4;
352 unsigned : 32;
353 } bfn;
354 struct {
355 unsigned : 32;
356 //
357 unsigned : 11;
358 unsigned rcount : 3;
359 unsigned : 2;
360 unsigned sdepth : 2;
361 unsigned : 14;
362 //
363 unsigned : 20;
364 unsigned src2SubBytePrecision : 2;
365 unsigned src1SubBytePrecision : 2;
366 unsigned : 8;
367 //
368 unsigned : 32;
369 } dpas;
370 struct {
371 unsigned : 32;
372 //
373 unsigned : 1;
374 unsigned fusionCtrl : 1;
375 unsigned eot : 1;
376 unsigned exDesc11_23 : 13;
377 unsigned descIsReg : 1;
378 unsigned exDescIsReg : 1;
379 unsigned dstRegFile : 1;
380 unsigned desc20_24 : 5;
381 unsigned dstReg : 8;
382 //
383 unsigned exDesc24_25 : 2;
384 unsigned src0RegFile : 1;
385 unsigned desc25_29 : 5;
386 unsigned src0Reg : 8;
387 unsigned : 1;
388 unsigned desc0_10 : 11;
389 unsigned sfid : 4;
390 //
391 unsigned exDesc26_27 : 2;
392 unsigned src1RegFile : 1;
393 unsigned exDesc6_10 : 5;
394 unsigned src1Reg : 8;
395 unsigned : 1;
396 unsigned desc11_19 : 9;
397 unsigned desc30_31 : 2;
398 unsigned exDesc28_31 : 4;
399 } send;
400 struct {
401 unsigned : 32;
402 unsigned : 8;
403 unsigned exDescReg : 3;
404 unsigned : 21;
405 unsigned : 32;
406 unsigned : 32;
407 } sendIndirect;
408 struct {
409 unsigned : 32; // common
410 unsigned : 1;
411 unsigned branchCtrl : 1;
412 unsigned : 30;
413 int32_t uip;
414 int32_t jip;
415 } branches;
416 uint64_t qword[2];
417 };
418
419 constexpr Instruction12() : qword{0,0} {};
420
421 // Decoding routines for auto-SWSB.
422 bool autoSWSB() const { return (common.opcode & 0x80); }
423 SWSBInfo swsb() const { return SWSBInfo12::createFromRaw(common.swsb).decode(opcode()); }
424 void setSWSB(SWSBInfo swsb) { common.swsb = SWSBInfo12(swsb, opcode()).raw(); }
425 void clearAutoSWSB() { common.opcode &= 0x7F; }
426 Opcode opcode() const { return static_cast<Opcode>(common.opcode & 0x7F); }
427 SyncFunction syncFC() const { return static_cast<SyncFunction>(binary.cmod); }
428 SharedFunction sfid() const { return static_cast<SharedFunction>(send.sfid); }
429 bool eot() const { return (opcode() == Opcode::send || opcode() == Opcode::sendc) && send.eot; }
430 bool predicated() const { return !common.maskCtrl || (static_cast<PredCtrl>(common.predCtrl) != PredCtrl::None); }
431 bool atomic() const { return common.atomicCtrl; }
432 unsigned dstTypecode() const { return binary.dstType; }
433 unsigned src0Typecode() const { return srcTypecode(0); }
434 unsigned src1Typecode() const { return srcTypecode(1); }
435 void shiftJIP(int32_t shift) { branches.jip += shift * sizeof(Instruction12); }
436 void shiftUIP(int32_t shift) { branches.uip += shift * sizeof(Instruction12); }
437
438 inline autoswsb::DestinationMask destinations(int &jip, int &uip) const;
439 template <bool xeHPC = false>
440 inline bool getOperandRegion(autoswsb::DependencyRegion &region, int opNum) const;
441 inline bool getImm32(uint32_t &imm) const;
442 inline bool getSendDesc(MessageDescriptor &desc) const;
443 inline bool getARFType(ARFType &arfType, int opNum) const;
444
445 bool isMathMacro() const {
446 if (opcode() != Opcode::math) return false;
447 auto fc = static_cast<MathFunction>(binary.cmod);
448 return (fc == MathFunction::invm || fc == MathFunction::rsqtm);
449 }
450
451protected:
452 inline unsigned srcTypecode(int opNum) const;
453};
454
455static_assert(sizeof(Instruction12) == 16, "Internal error: Instruction12 has been padded by the compiler.");
456
457struct InstructionXeHPC : public Instruction12 {
458 SWSBInfo swsb() const { return SWSBInfoXeHPC::createFromRaw(commonXeHPC.swsb).decode(opcode()); }
459 void setSWSB(SWSBInfo swsb) { commonXeHPC.swsb = SWSBInfoXeHPC(swsb, opcode()).raw(); }
460
461 template <bool xeHPC = true>
462 bool getOperandRegion(autoswsb::DependencyRegion &region, int opNum) const {
463 return Instruction12::getOperandRegion<true>(region, opNum);
464 }
465};
466
467static_assert(sizeof(InstructionXeHPC) == 16, "Internal error: InstructionXeHPC has been padded by the compiler.");
468
469// Encoding routines.
470
471static inline unsigned getTypecode12(DataType type)
472{
473 static const uint8_t conversionTable[32] = {2,6,1,5,0,4,11,10,3,7,9,13,8,0,4,8,
474 14,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2};
475 return conversionTable[static_cast<unsigned>(type) & 0x1F];
476}
477
478static inline unsigned pow2Encode(unsigned x)
479{
480 return (x == 0) ? 0 : (1 + utils::log2(x));
481}
482
483template <bool dest, bool encodeHS = true>
484static inline constexpr14 BinaryOperand12 encodeBinaryOperand12(const RegData &rd, EncodingTag12 tag)
485{
486 BinaryOperand12 op{0};
487
488#ifdef NGEN_SAFE
489 if (rd.isInvalid()) throw invalid_object_exception();
490#endif
491
492 if (rd.isIndirect()) {
493 op.indirect.addrOff = rd.getOffset();
494 op.indirect.addrReg = rd.getIndirectOff();
495 op.indirect.addrMode = 1;
496 if (!dest)
497 op.indirect.vs = (rd.isVxIndirect()) ? 0xFFFF : pow2Encode(rd.getVS());
498 } else {
499 op.direct.regFile = getRegFile(rd);
500 op.direct.subRegNum = rd.getByteOffset();
501 op.direct.regNum = rd.getBase();
502 op.direct.addrMode = 0;
503 if (!dest)
504 op.direct.vs = pow2Encode(rd.getVS());
505 }
506
507 if (encodeHS)
508 op.direct.hs = pow2Encode(rd.getHS());
509
510 if (!dest) op.direct.width = utils::log2(rd.getWidth());
511
512 return op;
513}
514
515template <bool dest, bool encodeHS = true>
516static inline constexpr14 BinaryOperand12 encodeBinaryOperand12(const RegData &rd, EncodingTagXeHPC tag)
517{
518 BinaryOperand12 op{0};
519
520#ifdef NGEN_SAFE
521 if (rd.isInvalid()) throw invalid_object_exception();
522#endif
523
524 if (rd.isIndirect()) {
525 op.indirect.addrOff = (rd.getOffset() >> 1);
526 op.indirect.addrReg = rd.getIndirectOff();
527 op.indirect.addrMode = 1;
528 if (!dest) {
529 op.indirect.vs = (rd.isVxIndirect()) ? 0xFFFF : pow2Encode(rd.getVS());
530 op.indirectXeHPC.addrOff0 = (rd.getOffset() & 1);
531 }
532 } else {
533 op.direct.regFile = getRegFile(rd);
534 op.direct.subRegNum = (rd.getByteOffset() >> 1);
535 op.direct.regNum = rd.getBase();
536 op.direct.addrMode = 0;
537 if (!dest) {
538 op.directXeHPC.vs = pow2Encode(rd.getVS());
539 op.directXeHPC.subRegNum0 = rd.getByteOffset() & 1;
540 }
541 }
542
543 if (encodeHS)
544 op.direct.hs = pow2Encode(rd.getHS());
545
546 if (!dest) op.direct.width = utils::log2(rd.getWidth());
547
548 return op;
549}
550
551template <bool dest, typename Tag>
552static inline constexpr14 BinaryOperand12 encodeBinaryOperand12(const ExtendedReg &reg, Tag tag)
553{
554 auto op = encodeBinaryOperand12<dest>(reg.getBase(), tag);
555 op.direct.subRegNum = reg.getMMENum();
556
557 return op;
558}
559
560template <bool dest, bool encodeHS = true>
561static inline constexpr14 TernaryOperand12 encodeTernaryOperand12(const RegData &rd, EncodingTag12 tag)
562{
563#ifdef NGEN_SAFE
564 if (rd.isInvalid()) throw invalid_object_exception();
565 if (rd.isIndirect()) throw invalid_operand_exception();
566#endif
567
568 TernaryOperand12 op{0};
569
570 if (encodeHS)
571 op.direct.hs = dest ? utils::log2(rd.getHS()) : pow2Encode(rd.getHS());
572
573 op.direct.regFile = getRegFile(rd);
574 op.direct.subRegNum = rd.getByteOffset();
575 op.direct.regNum = rd.getBase();
576
577 return op;
578}
579
580template <bool dest, bool encodeHS = true>
581static inline constexpr14 TernaryOperand12 encodeTernaryOperand12(const RegData &rd, EncodingTagXeHPC tag)
582{
583#ifdef NGEN_SAFE
584 if (rd.isInvalid()) throw invalid_object_exception();
585 if (rd.isIndirect()) throw invalid_operand_exception();
586#endif
587
588 TernaryOperand12 op{0};
589
590 if (encodeHS)
591 op.direct.hs = dest ? utils::log2(rd.getHS()) : pow2Encode(rd.getHS());
592
593 op.direct.regFile = getRegFile(rd);
594 op.direct.subRegNum = rd.getByteOffset() >> 1;
595 op.direct.regNum = rd.getBase();
596
597 return op;
598}
599
600template <bool dest, typename Tag>
601static inline constexpr14 TernaryOperand12 encodeTernaryOperand12(const ExtendedReg &reg, Tag tag)
602{
603 auto op = encodeTernaryOperand12<dest>(reg.getBase(), tag);
604 op.direct.subRegNum = reg.getMMENum();
605
606 return op;
607}
608
609static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag)
610{
611 i.common.opcode = static_cast<unsigned>(opcode) | (mod.parts.autoSWSB << 7);
612 i.common.swsb = SWSBInfo12(mod.getSWSB(), opcode).raw();
613 i.common.execSize = mod.parts.eSizeField;
614 i.common.execOffset = mod.parts.chanOff;
615 i.common.flagReg = (mod.parts.flagRegNum << 1) | mod.parts.flagSubRegNum;
616 i.common.predCtrl = mod.parts.predCtrl;
617 i.common.predInv = mod.parts.predInv;
618 i.common.cmptCtrl = mod.parts.cmptCtrl;
619 i.common.debugCtrl = mod.parts.debugCtrl;
620 i.common.maskCtrl = mod.parts.maskCtrl;
621 i.common.atomicCtrl = mod.parts.threadCtrl;
622 i.common.accWrCtrl = mod.parts.accWrCtrl;
623 i.common.saturate = mod.parts.saturate;
624}
625
626static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag)
627{
628 i.common.opcode = static_cast<unsigned>(opcode) | (mod.parts.autoSWSB << 7);
629 i.commonXeHPC.swsb = SWSBInfoXeHPC(mod.getSWSB(), opcode).raw();
630 i.commonXeHPC.execSize = mod.parts.eSizeField;
631 i.commonXeHPC.flagReg = (mod.parts.flagRegNum1 << 2) | (mod.parts.flagRegNum << 1) | mod.parts.flagSubRegNum;
632 i.commonXeHPC.execOffset = mod.parts.chanOff >> 1;
633 i.commonXeHPC.predCtrl = mod.parts.predCtrl;
634 i.common.predInv = mod.parts.predInv;
635 i.common.cmptCtrl = mod.parts.cmptCtrl;
636 i.common.debugCtrl = mod.parts.debugCtrl;
637 i.common.maskCtrl = mod.parts.maskCtrl;
638 i.common.atomicCtrl = mod.parts.threadCtrl;
639 i.commonXeHPC.dstExt = (dst.isIndirect() ? dst.getOffset() : dst.getByteOffset()) & 1;
640 i.common.saturate = mod.parts.saturate;
641}
642
643template <typename Tag>
644static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const ExtendedReg &dst, Tag tag)
645{
646 encodeCommon12(i, opcode, mod, dst.getBase(), tag);
647}
648
649static inline unsigned encodeTernaryVS01(const RegData &rd)
650{
651 switch (rd.getVS()) {
652 case 0: return 0;
653 case 1: return 1;
654 case 4: return 2;
655 case 8: return 3;
656 default:
657#ifdef NGEN_SAFE
658 if (rd.getHS() == 0)
659 throw invalid_region_exception();
660#endif
661 return 3;
662 }
663}
664
665static inline unsigned encodeTernaryVS01(const ExtendedReg &reg)
666{
667 return encodeTernaryVS01(reg.getBase());
668}
669
670template <typename D, typename S0, typename S1, typename S2>
671static inline void encodeTernaryTypes(Instruction12 &i, D dst, S0 src0, S1 src1, S2 src2)
672{
673 auto dtype = getTypecode12(dst.getType());
674 auto s0type = getTypecode12(src0.getType());
675 auto s1type = getTypecode12(src1.getType());
676 auto s2type = getTypecode12(src2.getType());
677
678 i.ternary.execType = (dtype >> 3);
679 i.ternary.dstType = dtype;
680 i.ternary.src0Type = s0type;
681 i.ternary.src1Type = s1type;
682 i.ternary.src2Type = s2type;
683
684#ifdef NGEN_SAFE
685 if (((dtype & s0type & s1type & s2type) ^ (dtype | s0type | s1type | s2type)) & 8)
686 throw ngen::invalid_type_exception();
687#endif
688}
689
690template <typename S0, typename Tag>
691static inline void encodeTernarySrc0(Instruction12 &i, S0 src0, Tag tag)
692{
693 i.ternary.src0 = encodeTernaryOperand12<false>(src0, tag).bits;
694 i.ternary.src0Mods = src0.getMods();
695
696 auto vs0 = encodeTernaryVS01(src0);
697
698 i.ternary.src0VS0 = vs0;
699 i.ternary.src0VS1 = vs0 >> 1;
700}
701
702template <typename Tag>
703static inline void encodeTernarySrc0(Instruction12 &i, const Immediate &src0, Tag tag)
704{
705 i.ternary.src0Imm = true;
706 i.ternary.src0 = static_cast<uint64_t>(src0);
707}
708
709template <typename S1, typename Tag>
710static inline void encodeTernarySrc1(Instruction12 &i, S1 src1, Tag tag)
711{
712 i.ternary.src1 = encodeTernaryOperand12<false>(src1, tag).bits;
713 i.ternary.src1Mods = src1.getMods();
714
715 auto vs1 = encodeTernaryVS01(src1);
716
717 i.ternary.src1VS0 = vs1;
718 i.ternary.src1VS1 = vs1 >> 1;
719}
720
721template <typename S2, typename Tag>
722static inline void encodeTernarySrc2(Instruction12 &i, S2 src2, Tag tag)
723{
724 i.ternary.src2 = encodeTernaryOperand12<false>(src2, tag).bits;
725 i.ternary.src2Mods = src2.getMods();
726}
727
728template <typename Tag>
729static inline void encodeTernarySrc2(Instruction12 &i, const Immediate &src2, Tag tag)
730{
731 i.ternary.src2Imm = true;
732 i.ternary.src2 = static_cast<uint64_t>(src2);
733}
734
735static inline void encodeSendExDesc(Instruction12 &i, uint32_t exdesc)
736{
737 i.send.eot = (exdesc >> 5);
738 i.send.exDesc6_10 = (exdesc >> 6);
739 i.send.exDesc11_23 = (exdesc >> 11);
740 i.send.exDesc24_25 = (exdesc >> 24);
741 i.send.exDesc26_27 = (exdesc >> 26);
742 i.send.exDesc28_31 = (exdesc >> 28);
743}
744
745static inline void encodeSendExDesc(Instruction12 &i, RegData exdesc)
746{
747#ifdef NGEN_SAFE
748 // Only a0.x:ud is allowed for extended descriptor.
749 if (!exdesc.isARF() || exdesc.getARFType() != ARFType::a || exdesc.getARFBase() != 0 || exdesc.getType() != DataType::ud)
750 throw invalid_arf_exception();
751#endif
752 i.sendIndirect.exDescReg = exdesc.getOffset();
753 i.send.exDescIsReg = true;
754}
755
756static inline void encodeSendDesc(Instruction12 &i, uint32_t desc)
757{
758 i.send.desc0_10 = (desc >> 0);
759 i.send.desc11_19 = (desc >> 11);
760 i.send.desc20_24 = (desc >> 20);
761 i.send.desc25_29 = (desc >> 25);
762 i.send.desc30_31 = (desc >> 30);
763}
764
765static inline void encodeSendDesc(Instruction12 &i, RegData desc)
766{
767#ifdef NGEN_SAFE
768 // Only a0.0:ud is allowed for desc.
769 if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0)
770 throw invalid_arf_exception();
771#endif
772 i.send.descIsReg = true;
773}
774
775/*********************/
776/* Decoding Routines */
777/*********************/
778
779static inline DataType decodeRegTypecode12(unsigned dt)
780{
781 static const DataType conversionTable[16] = {
782 DataType::ub, DataType::uw, DataType::ud, DataType::uq,
783 DataType::b, DataType::w, DataType::d, DataType::q,
784 DataType::invalid, DataType::hf, DataType::f, DataType::df,
785 DataType::invalid, DataType::bf, DataType::tf32, DataType::bf8
786 };
787 return conversionTable[dt & 0xF];
788}
789
790static inline int decodeDPASTypecodeBytes12(unsigned dt)
791{
792 return (1 << (dt & 3));
793}
794
795template <bool xeHPC>
796bool Instruction12::getOperandRegion(autoswsb::DependencyRegion &region, int opNum) const
797{
798 using namespace autoswsb;
799
800 auto hw = region.hw;
801 auto op = opcode();
802 RegData rd;
803
804 switch (op) {
805 case Opcode::nop_gen12:
806 case Opcode::illegal:
807 return false;
808 case Opcode::wrdep:
809 if (opNum != 0) return false;
810 BinaryOperand12 o0, o1;
811 o0.bits = binary.src0;
812 o1.bits = binary.src1;
813 region = DependencyRegion(hw, GRF(o0.direct.regNum)-GRF(o1.direct.regNum));
814 return true;
815 case Opcode::dpas:
816 case Opcode::dpasw: {
817 unsigned sdepth = 1 << dpas.sdepth;
818 unsigned rcount = 1 + dpas.rcount;
819 unsigned len;
820 TernaryOperand12 o;
821
822 switch (opNum) {
823 case -1:
824 len = (rcount * decodeDPASTypecodeBytes12(ternary.dstType) + 3) >> 2;
825 o.bits = ternary.dst;
826 break;
827 case 0:
828 len = (rcount * decodeDPASTypecodeBytes12(ternary.src0Type) + 3) >> 2;
829 o.bits = ternary.src0;
830 break;
831 case 1: len = sdepth; o.bits = ternary.src1; break;
832 case 2: {
833 if (op == Opcode::dpasw) rcount = (rcount + 1) >> 1;
834 o.bits = ternary.src2;
835 auto sr = o.direct.subRegNum;
836 if (xeHPC)
837 len = ((sr << 1) + sdepth * rcount * 4 + 63) >> 6;
838 else
839 len = (sr + sdepth * rcount * 4 + 31) >> 5;
840 break;
841 }
842 default: return false;
843 }
844
845 region = DependencyRegion(hw, GRFRange(o.direct.regNum, len));
846 return true;
847 }
848 case Opcode::send:
849 case Opcode::sendc: {
850 int base = 0, len = 0;
851 switch (opNum) {
852 case -1:
853 if (send.dstRegFile == RegFileARF) return false;
854 base = send.dstReg;
855 len = send.descIsReg ? -1 : send.desc20_24;
856 if (len == 31) len++;
857 break;
858 case 0:
859 if (send.src0RegFile == RegFileARF) return false;
860 base = send.src0Reg;
861 len = send.descIsReg ? -1 : (send.desc25_29 & 0xF);
862 break;
863 case 1:
864 if (send.src1RegFile == RegFileARF) return false;
865 base = send.src1Reg;
866 len = send.exDescIsReg ? -1 : send.exDesc6_10;
867 break;
868 case 2:
869 case 3: // TODO: May need to track indirect acc usage
870 default: return false;
871 }
872
873 if (len == 0)
874 return false;
875 else if (len == -1)
876 region = DependencyRegion(hw);
877 else
878 region = DependencyRegion(hw, GRFRange(base, len));
879 return true;
880 }
881 case Opcode::dp4a:
882 case Opcode::add3:
883 case Opcode::bfn:
884 case Opcode::bfe_gen12:
885 case Opcode::bfi2_gen12:
886 case Opcode::csel_gen12:
887 case Opcode::mad:
888 case Opcode::madm: { // ternary
889 TernaryOperand12 o;
890 unsigned dt = 0, vs = 0;
891 switch (opNum) {
892 case -1:
893 o.bits = ternary.dst;
894 dt = ternary.dstType;
895 break;
896 case 0:
897 if (ternary.src0Imm) return false;
898 o.bits = ternary.src0;
899 dt = ternary.src0Type;
900 vs = ternary.src0VS0 + (ternary.src0VS1 * 3);
901 break;
902 case 1:
903 o.bits = ternary.src1;
904 dt = ternary.src1Type;
905 vs = ternary.src1VS0 + (ternary.src1VS1 * 3);
906 break;
907 case 2:
908 if (ternary.src2Imm) return false;
909 o.bits = ternary.src2;
910 dt = ternary.src2Type;
911 break;
912 default: return false;
913 }
914 dt |= (ternary.execType << 3);
915 if (op == Opcode::madm) o.direct.subRegNum = 0;
916 auto base = GRF(o.direct.regNum).retype(decodeRegTypecode12(dt));
917 auto sr = o.direct.subRegNum;
918 if (xeHPC) sr <<= 1;
919 auto sub = base[sr / getBytes(base.getType())];
920 auto hs = (1 << o.direct.hs);
921 if (opNum >= 0) hs >>= 1;
922 if ((opNum < 0) || (opNum == 2))
923 rd = sub(hs);
924 else
925 rd = sub((1 << vs) >> 1, hs);
926
927 if (o.direct.regFile == RegFileARF) {
928 rd.setARF(true);
929 if (!autoswsb::trackableARF(rd.getARFType()))
930 return false;
931 }
932 break;
933 }
934 default: { // unary/binary
935 BinaryOperand12 o;
936 unsigned dt;
937 switch (opNum) {
938 case -1:
939 o.bits = binary.dst;
940 dt = binary.dstType;
941 break;
942 case 0:
943 if (binary.src0Imm) return false;
944 o.bits = binary.src0;
945 dt = binary.src0Type;
946 break;
947 case 1:
948 if (binary.src0Imm || binary.src1Imm) return false;
949 o.bits = binary.src1;
950 dt = binary.src1Type;
951 break;
952 default: return false;
953 }
954 if (o.direct.addrMode) { region = DependencyRegion(hw); return true; } // indirect
955 if (isMathMacro())
956 o.direct.subRegNum = 0;
957 auto sr = xeHPC ? ((o.direct.subRegNum << 1) | o.directXeHPC.subRegNum0)
958 : o.direct.subRegNum;
959 auto vs = xeHPC ? o.directXeHPC.vs : o.direct.vs;
960 auto base = GRF(o.direct.regNum).retype(decodeRegTypecode12(dt));
961 auto sub = base[sr / getBytes(base.getType())];
962 auto hs = (1 << o.direct.hs) >> 1;
963 if (opNum < 0)
964 rd = sub(hs);
965 else
966 rd = sub((1 << vs) >> 1, 1 << o.direct.width, hs);
967
968 if (o.direct.regFile == RegFileARF) {
969 rd.setARF(true);
970 if (!autoswsb::trackableARF(rd.getARFType()))
971 return false;
972 }
973 break;
974 }
975 }
976
977 auto esize = 1 << ((hw >= HW::XeHPC) ? commonXeHPC.execSize : common.execSize);
978 rd.fixup(hw, esize, DataType::invalid, opNum < 0, 2);
979 region = DependencyRegion(hw, esize, rd);
980 return true;
981}
982
983unsigned Instruction12::srcTypecode(int opNum) const
984{
985 auto op = opcode();
986
987 switch (op) {
988 case Opcode::nop_gen12:
989 case Opcode::illegal:
990 case Opcode::send:
991 case Opcode::sendc:
992 case Opcode::dp4a:
993 return 0;
994 case Opcode::dpas:
995 case Opcode::dpasw:
996 // This method is only used for checking for long pipe types.
997 return 0;
998 case Opcode::add3:
999 case Opcode::bfn:
1000 case Opcode::bfe_gen12:
1001 case Opcode::bfi2_gen12:
1002 case Opcode::csel_gen12:
1003 case Opcode::mad:
1004 case Opcode::madm: // ternary
1005 switch (opNum) {
1006 case 0: return ternary.src0Type | (ternary.execType << 3);
1007 case 1: return ternary.src1Type | (ternary.execType << 3);
1008 case 2: return ternary.src2Type | (ternary.execType << 3);
1009 default: return 0;
1010 }
1011 default: // unary/binary
1012 switch (opNum) {
1013 case 0: return binary.src0Type;
1014 case 1: return binary.src1Type;
1015 default: return 0;
1016 }
1017 }
1018
1019 return 0;
1020}
1021
1022bool Instruction12::getImm32(uint32_t &imm) const
1023{
1024 // Only need to support sync.allrd/wr.
1025 if (binary.src0Imm)
1026 imm = imm32.value;
1027 return binary.src0Imm;
1028}
1029
1030bool Instruction12::getSendDesc(MessageDescriptor &desc) const
1031{
1032 if (!send.descIsReg)
1033 desc.all = send.desc0_10 | (send.desc11_19 << 11) | (send.desc20_24 << 20)
1034 | (send.desc25_29 << 25) | (send.desc30_31 << 30);
1035 return !send.descIsReg;
1036}
1037
1038bool Instruction12::getARFType(ARFType &arfType, int opNum) const
1039{
1040 if (opNum > 1) return false;
1041
1042 // Only need to support unary/binary, for detecting ce/cr/sr usage.
1043 switch (opcode()) {
1044 case Opcode::nop:
1045 case Opcode::illegal:
1046 case Opcode::send:
1047 case Opcode::sendc:
1048 case Opcode::bfe:
1049 case Opcode::bfi2:
1050 case Opcode::csel:
1051 case Opcode::mad:
1052 case Opcode::madm:
1053 case Opcode::dp4a:
1054 case Opcode::add3:
1055 case Opcode::bfn:
1056 case Opcode::dpas:
1057 case Opcode::dpasw:
1058 return false;
1059 default: {
1060 BinaryOperand12 o;
1061 switch (opNum) {
1062 case -1:
1063 o.bits = binary.dst;
1064 break;
1065 case 0:
1066 if (binary.src0Imm) return false;
1067 o.bits = binary.src0;
1068 break;
1069 case 1:
1070 if (binary.src0Imm || binary.src1Imm) return false;
1071 o.bits = binary.src1;
1072 break;
1073 default: return false;
1074 }
1075 if (o.direct.addrMode) return false;
1076 if (o.direct.regFile != RegFileARF) return false;
1077 arfType = static_cast<ARFType>(o.direct.regNum >> 4);
1078 return true;
1079 }
1080 }
1081}
1082
1083autoswsb::DestinationMask Instruction12::destinations(int &jip, int &uip) const
1084{
1085 using namespace autoswsb;
1086
1087 if (!isBranch(opcode())) {
1088 if (opcode() == Opcode::send || opcode() == Opcode::sendc)
1089 if (send.eot)
1090 return DestNone;
1091 return DestNextIP;
1092 }
1093
1094 DestinationMask mask = DestNextIP;
1095 switch (opcode()) {
1096 case Opcode::ret:
1097 case Opcode::endif:
1098 case Opcode::while_:
1099 case Opcode::call:
1100 case Opcode::calla:
1101 case Opcode::join:
1102 case Opcode::jmpi:
1103 case Opcode::brd:
1104 mask = binary.src0Imm ? (DestNextIP | DestJIP) : DestUnknown; break;
1105 case Opcode::goto_:
1106 case Opcode::if_:
1107 case Opcode::else_:
1108 case Opcode::break_:
1109 case Opcode::cont:
1110 case Opcode::halt:
1111 case Opcode::brc:
1112 mask = binary.src0Imm ? (DestNextIP | DestJIP | DestUIP) : DestUnknown; break;
1113 default: break;
1114 }
1115
1116 if ((opcode() == Opcode::jmpi) && !predicated())
1117 mask &= ~DestNextIP;
1118
1119 if (mask & DestJIP) jip = branches.jip / sizeof(Instruction12);
1120 if (mask & DestUIP) uip = branches.uip / sizeof(Instruction12);
1121
1122 return mask;
1123}
1124