1/*******************************************************************************
2* Copyright 2019-2021 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17/*
18 * Do not #include this file directly; ngen uses it internally.
19 */
20
21// Gen8-11 binary encoding implementation.
22
23// 25 bits of data common between src0 and src1.
24union BinaryOperand8 {
25 uint32_t bits;
26 struct {
27 unsigned chanSel03 : 4; // chanEn for dst
28 unsigned subRegNum4 : 1;
29 unsigned regNum : 8;
30 unsigned srcMod : 2;
31 unsigned addrMode : 1;
32 unsigned chanSel47 : 4;
33 unsigned _ : 1;
34 unsigned vs : 4;
35 unsigned : 7;
36 } direct16;
37 struct {
38 unsigned subRegNum : 5;
39 unsigned regNum : 8;
40 unsigned srcMod : 2;
41 unsigned addrMode : 1;
42 unsigned hs : 2; // hs for dst
43 unsigned width : 3;
44 unsigned vs : 4;
45 unsigned : 7;
46 } direct1;
47 struct {
48 unsigned chanSel03 : 4; // chanEn for dst
49 unsigned addrImm48 : 5;
50 unsigned addrSubreg : 4;
51 unsigned srcMod : 2;
52 unsigned addrMode : 1;
53 unsigned chanSel47 : 4;
54 unsigned _ : 1;
55 unsigned vs : 4;
56 unsigned : 7;
57 } indirect16;
58 struct {
59 unsigned addrImm08 : 9;
60 unsigned addrSubreg : 4;
61 unsigned srcMod : 2;
62 unsigned addrMode : 1; // hs for dst
63 unsigned hs : 2;
64 unsigned width : 3;
65 unsigned vs : 4;
66 unsigned : 7;
67 } indirect1;
68};
69
70// Ternary operands: 21 bits each.
71union TernaryOperand8 {
72 uint32_t bits;
73 struct {
74 unsigned type : 3;
75 unsigned vs : 2;
76 unsigned hs : 2;
77 unsigned subRegNum : 5;
78 unsigned regNum : 8;
79 unsigned : 1;
80 //
81 unsigned : 11;
82 } direct1;
83 struct {
84 unsigned repCtrl : 1;
85 unsigned chanSel : 8;
86 unsigned subReg2_4 : 3;
87 unsigned regNum : 8;
88 unsigned subReg1 : 1;
89 unsigned : 11;
90 } direct16;
91 struct {
92 unsigned type : 3;
93 unsigned value : 16;
94 unsigned : 2;
95 unsigned : 11;
96 } immediate1;
97};
98
99union Instruction8 {
100 struct { // Lower 35 bits are essentially common.
101 unsigned opcode : 8;
102 unsigned accessMode : 1;
103 unsigned depCtrl : 2;
104 unsigned execOffset : 3;
105 unsigned threadCtrl : 2;
106 unsigned predCtrl : 4;
107 unsigned predInv : 1;
108 unsigned execSize : 3;
109 unsigned cmod : 4; // FC for math, SFID for send, zero for branches
110 unsigned accWrCtrl : 1; // aka branchCtrl, noSrcDepSet
111 unsigned cmptCtrl : 1;
112 unsigned debugCtrl : 1;
113 unsigned saturate : 1;
114 //
115 unsigned flagSubRegNum : 1;
116 unsigned flagRegNum : 1;
117 unsigned maskCtrl : 1;
118 unsigned : 29;
119 //
120 unsigned : 32;
121 unsigned : 32;
122 } common;
123 struct {
124 unsigned : 32;
125 //
126 unsigned : 3;
127 unsigned dstRegFile : 2;
128 unsigned dstType : 4;
129 unsigned src0RegFile : 2;
130 unsigned src0Type : 4;
131 unsigned dstAddrImm9 : 1; // indirect only
132 unsigned dst : 16; // first 16 bits of BinaryOperand8
133 //
134 unsigned src0 : 25;
135 unsigned src1RegFile : 2;
136 unsigned src1Type : 4;
137 unsigned src0AddrImm9 : 1;
138 //
139 unsigned src1 : 25;
140 unsigned src1AddrImm9 : 1; // indirect only
141 unsigned _ : 6;
142 } binary;
143 struct {
144 uint64_t _;
145 uint32_t __;
146 uint32_t value;
147 } imm32;
148 struct {
149 uint64_t _;
150 uint64_t value;
151 } imm64;
152 struct {
153 unsigned : 32; // common
154 unsigned : 3;
155 unsigned execDataType : 1;
156 unsigned dstRegFile : 1;
157 unsigned src0Mod : 2;
158 unsigned src1Mod : 2;
159 unsigned src2Mod : 2;
160 unsigned src0RegFile : 1;
161 unsigned src1RegFile : 1;
162 unsigned src2RegFile : 1;
163 unsigned dstType : 3;
164 unsigned dstHS : 1;
165 unsigned : 2;
166 unsigned dstSubRegNum : 4;
167 unsigned dstRegNum : 8;
168 //
169 unsigned src0 : 21;
170 unsigned src1L : 11;
171 //
172 unsigned src1H : 10;
173 unsigned src2 : 21;
174 unsigned _ : 1;
175 } ternary1;
176 struct {
177 unsigned : 32; // common
178 unsigned : 3;
179 unsigned src2Type : 1;
180 unsigned src1Type : 1;
181 unsigned src0Mod : 2;
182 unsigned src1Mod : 2;
183 unsigned src2Mod : 2;
184 unsigned srcType : 3;
185 unsigned dstType : 3;
186 unsigned dstChanEn : 4;
187 unsigned dstSubregNum2_4 : 3;
188 unsigned dstRegNum : 8;
189 //
190 unsigned src0 : 21;
191 unsigned src1L : 11;
192 //
193 unsigned src1H : 10;
194 unsigned src2 : 21;
195 unsigned _ : 1;
196 } ternary16;
197 struct {
198 unsigned : 24; // common
199 unsigned sfid : 4;
200 unsigned noSrcDepSet : 1;
201 unsigned : 3; // common
202 //
203 unsigned : 3;
204 unsigned dstRegFile : 1;
205 unsigned src1RegFile : 1;
206 unsigned : 7; // common
207 unsigned src1RegNum : 8;
208 unsigned : 9; // common
209 unsigned selReg32ExDesc : 1;
210 unsigned dstAddrImm9 : 1;
211 unsigned : 1;
212 //
213 unsigned exDesc6_9 : 4;
214 unsigned : 9; // common
215 unsigned selReg32Desc : 1;
216 unsigned src0AddrImm9 : 1;
217 unsigned : 1;
218 unsigned exDesc16_31 : 16; // reg: address subregister
219 //
220 unsigned desc : 31; // reg?
221 unsigned eot : 1;
222 } sendsGen9;
223 struct { // Differences between send and sends
224 uint64_t _;
225 unsigned exDesc16_19 : 4;
226 unsigned : 12;
227 unsigned exDesc20_23 : 4;
228 unsigned zero : 1;
229 unsigned exDesc24_27 : 4;
230 unsigned : 2;
231 unsigned exDesc28_31 : 4;
232 unsigned : 1;
233 //
234 unsigned : 32;
235 } sendGen8;
236 struct {
237 unsigned : 28; // common
238 unsigned branchCtrl : 1;
239 unsigned : 3; // common
240 //
241 unsigned : 32;
242 unsigned uip : 32;
243 unsigned jip : 32;
244 } branches;
245 uint64_t qword[2];
246
247 constexpr Instruction8() : qword{0,0} {};
248};
249
250static_assert(sizeof(Instruction8) == 16, "Internal error: Instruction8 has been padded by the compiler.");
251
252// Encoding routines.
253
254static inline unsigned getImmediateTypecode8(DataType type)
255{
256 static const uint8_t conversionTable[16] = {0,1,2,3,2,3,10,7,8,9,11,0,0,4,6,5};
257 return conversionTable[static_cast<unsigned>(type) & 0xF];
258}
259
260static inline unsigned getTernary16Typecode8(DataType type)
261{
262 // 0-4: :f, :d, :ud, :df, :hf
263 static const uint8_t conversionTable[16] = {2,1,2,1,2,1,3,0,2,1,4,2,2,2,2,2};
264 return conversionTable[static_cast<unsigned>(type) & 0xF];
265}
266
267static inline unsigned getTypecode11(DataType type)
268{
269 static const uint8_t conversionTable[16] = {0,1,2,3,4,5,10,9,6,7,8,0,0,4,5,11};
270 return conversionTable[static_cast<unsigned>(type) & 0xF];
271}
272
273template <HW hw>
274static inline unsigned getTypecode(DataType type)
275{
276 return static_cast<int>(type) & 0xF;
277}
278
279template <>
280inline unsigned getTypecode<HW::Gen11>(DataType type)
281{
282 return getTypecode11(type);
283}
284
285template <HW hw>
286static inline unsigned getImmediateTypecode(DataType type)
287{
288 return getImmediateTypecode8(type);
289}
290
291template <>
292inline unsigned getImmediateTypecode<HW::Gen11>(DataType type)
293{
294 return getTypecode11(type);
295}
296
297template <bool dest>
298static inline constexpr14 BinaryOperand8 encodeBinaryOperand8(const RegData &rd)
299{
300 BinaryOperand8 result{0};
301
302#ifdef NGEN_SAFE
303 if (rd.isInvalid()) throw invalid_object_exception();
304#endif
305
306 if (rd.isIndirect()) {
307 result.indirect1.addrImm08 = rd.getOffset() & 0x1FF;
308 result.indirect1.addrMode = 1;
309 result.indirect1.addrSubreg = rd.getIndirectOff();
310 if (!dest) {
311 result.indirect1.vs = (rd.isVxIndirect()) ? 0xFFFF :
312 (rd.getVS() == 0) ? 0 :
313 (1 + utils::log2(rd.getVS()));
314 }
315 } else {
316 result.direct1.subRegNum = rd.getByteOffset();
317 result.direct1.regNum = rd.getBase();
318 result.direct1.addrMode = 0; // direct
319 if (!dest)
320 result.direct1.vs = (rd.getVS() == 0) ? 0 : (1 + utils::log2(rd.getVS()));
321 }
322
323 int hsEncoded = (rd.getHS() == 0) ? 0 : (1 + utils::log2(rd.getHS()));
324
325 if (dest)
326 result.direct1.srcMod = hsEncoded;
327 else {
328 result.direct1.hs = hsEncoded;
329 result.direct1.width = utils::log2(rd.getWidth());
330 result.direct1.srcMod = rd.getMods();
331 }
332
333 return result;
334}
335
336template <bool dest>
337static inline constexpr14 BinaryOperand8 encodeBinaryOperand8(const ExtendedReg &reg)
338{
339 BinaryOperand8 result{0};
340
341#ifdef NGEN_SAFE
342 if (reg.isInvalid()) throw invalid_object_exception();
343 if (reg.isIndirect()) throw invalid_operand_exception();
344#endif
345
346 RegData rd = reg.getBase();
347
348 result.direct1.subRegNum = reg.getMMENum();
349 result.direct1.regNum = rd.getBase();
350 result.direct1.addrMode = 0;
351
352 int hsEncoded = (rd.getHS() == 0) ? 0 : (1 + utils::log2(rd.getHS()));
353
354 if (dest)
355 result.direct1.srcMod = hsEncoded;
356 else {
357 result.direct1.hs = hsEncoded;
358 result.direct1.width = utils::log2(rd.getWidth());
359 result.direct1.srcMod = rd.getMods();
360 result.direct1.vs = (rd.getVS() == 0) ? 0 : (1 + utils::log2(rd.getVS()));
361 }
362
363 return result;
364}
365
366template <bool dest>
367static inline constexpr14 BinaryOperand8 encodeBinaryOperand8(const Align16Operand &op)
368{
369 BinaryOperand8 result{0};
370 auto &rd = op.getReg();
371
372#ifdef NGEN_SAFE
373 if (op.isInvalid()) throw invalid_object_exception();
374#endif
375
376 if (op.getReg().isIndirect()) {
377 result.indirect16.addrImm48 = rd.getOffset() >> 4;
378 result.indirect16.addrSubreg = rd.getIndirectOff();
379 result.indirect16.addrMode = 1; // indirect
380 } else {
381 result.direct16.subRegNum4 = rd.getByteOffset() >> 4;
382 result.direct16.regNum = rd.getBase();
383 result.direct16.addrMode = 0; // direct
384 }
385
386 if (dest)
387 result.direct16.chanSel03 = op.getChanEn();
388 else {
389 result.direct16.srcMod = rd.getMods();
390 result.direct16.chanSel03 = op.getChanSel() & 0xF;
391 result.direct16.chanSel47 = op.getChanSel() >> 4;
392 result.direct16.vs = (rd.getVS() == 0) ? 0 :
393 (rd.getBytes() == 8) ? 2 : 3;
394 }
395
396 return result;
397}
398
399template <bool src2>
400static inline constexpr14 TernaryOperand8 encodeTernarySrcOperand8(const RegData &rd)
401{
402#ifdef NGEN_SAFE
403 if (rd.isInvalid()) throw invalid_object_exception();
404 if (rd.isIndirect()) throw invalid_operand_exception();
405#endif
406
407 TernaryOperand8 result{0};
408
409 result.direct1.hs = (rd.getHS() == 0) ? 0 : (1 + utils::log2(rd.getHS()));
410 if (!src2)
411 result.direct1.vs = (rd.getVS() == 0) ? 0 : utils::log2(rd.getVS());
412 result.direct1.regNum = rd.getBase();
413 result.direct1.subRegNum = rd.getByteOffset();
414 result.direct1.type = getTypecode11(rd.getType());
415
416 return result;
417}
418
419template <bool src2>
420static inline constexpr14 TernaryOperand8 encodeTernarySrcOperand8(const ExtendedReg &reg)
421{
422#ifdef NGEN_SAFE
423 if (reg.isInvalid()) throw invalid_object_exception();
424 if (reg.isIndirect()) throw invalid_operand_exception();
425#endif
426
427 TernaryOperand8 result{0};
428
429 RegData rd = reg.getBase();
430
431 result.direct1.hs = (rd.getHS() == 0) ? 0 : (1 + utils::log2(rd.getHS()));
432 if (!src2)
433 result.direct1.vs = (rd.getVS() == 0) ? 0 : utils::log2(rd.getVS());
434 result.direct1.regNum = rd.getBase();
435 result.direct1.subRegNum = reg.getMMENum() << 1;
436 result.direct1.type = getTypecode11(rd.getType());
437
438 return result;
439}
440
441template <bool src2>
442static inline constexpr14 TernaryOperand8 encodeTernarySrcOperand8(const Align16Operand &rd)
443{
444#ifdef NGEN_SAFE
445 if (rd.getReg().isInvalid()) throw invalid_object_exception();
446 if (rd.isIndirect()) throw invalid_operand_exception();
447#endif
448
449 TernaryOperand8 result{0};
450
451 result.direct16.chanSel = rd.getChanSel();
452 result.direct16.regNum = rd.getReg().getBase();
453 result.direct16.repCtrl = rd.isRep();
454
455 int sr = rd.getReg().getByteOffset();
456 result.direct16.subReg2_4 = sr >> 2;
457 result.direct16.subReg1 = (sr >> 1) & 1;
458
459 return result;
460}
461
462template <bool src2>
463static inline constexpr14 TernaryOperand8 encodeTernarySrcOperand8(const Immediate &imm)
464{
465#ifdef NGEN_SAFE
466 if (getBytes(imm.getType()) != 2)
467 throw invalid_operand_exception();
468#endif
469
470 TernaryOperand8 result{0};
471
472 result.immediate1.type = getTypecode11(imm.getType());
473 result.immediate1.value = static_cast<uint64_t>(imm);
474
475 return result;
476}
477
478template <typename S0, typename S1, typename S2>
479static inline void encodeTernaryCommon8(Instruction8 &i, S0 src0, S1 src1, S2 src2)
480{
481 i.ternary16.src0Mod = src0.getMods();
482 i.ternary16.src1Mod = src1.getMods();
483 i.ternary16.src2Mod = src2.getMods();
484
485 uint64_t src0bits = encodeTernarySrcOperand8<false>(src0).bits;
486 uint64_t src1bits = encodeTernarySrcOperand8<false>(src1).bits;
487 uint64_t src2bits = encodeTernarySrcOperand8<true>(src2).bits;
488
489 // Manually encode upper qword because src1 crosses 32-bit boundary.
490 i.qword[1] = (src2bits << 42) | (src1bits << 21) | src0bits;
491}
492
493static inline void encodeTernary1Dst10(Instruction8 &i, const RegData &dst)
494{
495 int dtype = getTypecode11(dst.getType());
496 i.ternary1.execDataType = dtype >> 3;
497 i.ternary1.dstType = dtype;
498 i.ternary1.dstRegFile = dst.isARF();
499 i.ternary1.dstRegNum = dst.getBase();
500 i.ternary1.dstSubRegNum = dst.getByteOffset() >> 1;
501}
502
503static inline void encodeTernary1Dst10(Instruction8 &i, const ExtendedReg &dst)
504{
505 int dtype = getTypecode11(dst.getType());
506 i.ternary1.execDataType = dtype >> 3;
507 i.ternary1.dstType = dtype;
508 i.ternary1.dstRegFile = dst.isARF();
509 i.ternary1.dstRegNum = dst.getBase().getBase();
510 i.ternary1.dstSubRegNum = dst.getMMENum();
511}
512
513static inline void encodeCommon8(Instruction8 &i, Opcode opcode, const InstructionModifier &mod)
514{
515 i.qword[0] = (mod.getAll() & ~0xFF) | static_cast<unsigned>(opcode);
516}
517
518static inline void encodeSendsExDesc(Instruction8 &i, uint32_t exdesc)
519{
520 i.sendsGen9.sfid = exdesc & 0xF;
521 i.sendsGen9.exDesc6_9 = (exdesc >> 6) & 0xF;
522 i.sendsGen9.exDesc16_31 = (exdesc >> 16) & 0xFFFF;
523
524 i.sendsGen9.selReg32ExDesc = false;
525 i.sendsGen9.eot = (exdesc >> 5) & 1;
526}
527
528static inline void encodeSendsExDesc(Instruction8 &i, RegData exdesc)
529{
530#ifdef NGEN_SAFE
531 // Only a0.x:ud is allowed for extended descriptor.
532 if (!exdesc.isARF() || exdesc.getARFType() != ARFType::a || exdesc.getARFBase() != 0 || exdesc.getType() != DataType::ud)
533 throw invalid_arf_exception();
534#endif
535 i.sendsGen9.selReg32ExDesc = true;
536 i.sendsGen9.eot = false; // No support for EOT with register exdesc currently.
537 i.sendsGen9.exDesc16_31 = exdesc.getOffset();
538}
539
540static inline void encodeSendsDesc(Instruction8 &i, uint32_t desc)
541{
542 i.sendsGen9.desc = desc;
543 i.sendsGen9.selReg32Desc = false;
544}
545
546static inline void encodeSendsDesc(Instruction8 &i, RegData desc)
547{
548#ifdef NGEN_SAFE
549 // Only a0.0:ud is allowed for desc.
550 if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0)
551 throw invalid_arf_exception();
552#endif
553 i.sendsGen9.desc = desc.getOffset();
554 i.sendsGen9.selReg32Desc = true;
555}
556
557static inline constexpr14 Align16Operand emulateAlign16Dst(const RegData &rd)
558{
559#ifdef NGEN_SAFE
560 if (rd.getHS() != 1 || (rd.getVS() != rd.getWidth()))
561 throw invalid_region_exception();
562#endif
563 return Align16Operand(rd, 0xF);
564}
565
566static inline constexpr14 Align16Operand emulateAlign16Src(const RegData &rd)
567{
568 // Try to emulate Align1 regioning with Align16. Fun stuff!
569 auto hs = rd.getHS(), vs = rd.getVS(), width = rd.getWidth();
570
571 if (hs == 0 && vs == 0) {
572 // Broadcast, using RepCtrl. DF doesn't support repCtrl though;
573 // use swizzles to "emulate", like IGA does.
574 if (rd.getType() == DataType::df) {
575 auto shift = (rd.getOffset() & 1) << 1;
576 RegData rdmod = rd;
577 rdmod.setOffset(rdmod.getOffset() & ~1);
578
579 return Align16Operand(rdmod, shift, shift + 1, shift, shift + 1);
580 } else
581 return Align16Operand::createBroadcast(rd);
582 } else if (hs == 1 && vs == width) {
583 // Unit stride. Trivial swizzle.
584 return Align16Operand(rd, 0, 1, 2, 3);
585 } else {
586#ifdef NGEN_SAFE
587 throw invalid_region_exception();
588#else
589 return Align16Operand(rd, 0, 1, 2, 3);
590#endif
591 }
592}
593
594static inline constexpr14 Align16Operand extToAlign16(const ExtendedReg &reg)
595{
596 return Align16Operand::createWithMME(reg.getBase(), reg.getMMENum());
597}
598