1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | /* |
18 | * Do not #include this file directly; ngen uses it internally. |
19 | */ |
20 | |
21 | // Gen12 binary encoding. |
22 | |
23 | struct EncodingTag12 {}; |
24 | struct EncodingTagXeHPC {}; |
25 | template <HW hw> struct EncodingTag12Dispatch { using tag = EncodingTag12; }; |
26 | template <> struct EncodingTag12Dispatch<HW::XeHPC> { using tag = EncodingTagXeHPC; }; |
27 | |
28 | class SWSBInfo12 |
29 | { |
30 | friend class InstructionModifier; |
31 | protected: |
32 | union { |
33 | struct { |
34 | unsigned dist : 3; |
35 | unsigned pipe : 4; |
36 | unsigned combined : 1; |
37 | } pipeline; |
38 | struct { |
39 | unsigned sbid : 4; |
40 | unsigned mode : 3; |
41 | unsigned combined : 1; |
42 | } scoreboard; |
43 | struct { |
44 | unsigned sbid : 4; |
45 | unsigned dist : 3; |
46 | unsigned combined : 1; |
47 | } combined; |
48 | uint8_t all; |
49 | }; |
50 | |
51 | constexpr SWSBInfo12(uint8_t all_, bool dummy) : all{all_} {} |
52 | |
53 | constexpr bool isPipeline() const { |
54 | return !combined.combined && ((scoreboard.mode < 2) || (scoreboard.mode > 4)); |
55 | } |
56 | |
57 | public: |
58 | constexpr SWSBInfo12() : all{0} {} |
59 | |
60 | SWSBInfo12(SWSBInfo info, Opcode op) { |
61 | if (info.hasDist() && info.hasToken()) { |
62 | combined.sbid = info.parts.token; |
63 | combined.dist = info.parts.dist; |
64 | combined.combined = true; |
65 | } else if (info.hasDist()) { |
66 | combined.combined = false; |
67 | uint8_t pipeMap[8] = {0, 1, 2, 3, 10, 0, 0, 0}; |
68 | pipeline.dist = info.parts.dist; |
69 | pipeline.pipe = pipeMap[info.parts.pipe & 7]; |
70 | } else if (info.hasToken()) { |
71 | combined.combined = false; |
72 | combined.sbid = info.parts.token; |
73 | scoreboard.mode = 1 + info.tokenMode(); |
74 | } else |
75 | all = 0; |
76 | } |
77 | |
78 | SWSBInfo decode(Opcode op) const { |
79 | if (combined.combined) { |
80 | bool vl = isVariableLatency(HW::Gen12LP, op); |
81 | auto pipe = (op == Opcode::send || op == Opcode::sendc) ? Pipe::A : Pipe::Default; |
82 | return SWSBInfo(combined.sbid, vl, true) | SWSBInfo(pipe, combined.dist); |
83 | } else if (isPipeline()) { |
84 | static const Pipe pipeMap[4] = {Pipe::Default, Pipe::A, Pipe::F, Pipe::I}; |
85 | auto pipe = (pipeline.pipe == 10) ? Pipe::L : pipeMap[pipeline.pipe & 3]; |
86 | return SWSBInfo(pipe, pipeline.dist); |
87 | } else |
88 | return SWSBInfo(scoreboard.sbid, scoreboard.mode != 2, scoreboard.mode != 3); |
89 | } |
90 | |
91 | constexpr bool empty() const { return all == 0; } |
92 | constexpr uint8_t raw() const { return all; } |
93 | static constexpr14 SWSBInfo12 createFromRaw(uint8_t all_) { return SWSBInfo12(all_, false); } |
94 | }; |
95 | |
96 | class SWSBInfoXeHPC |
97 | { |
98 | friend class InstructionModifier; |
99 | protected: |
100 | union { |
101 | struct { |
102 | unsigned dist : 3; |
103 | unsigned pipe : 4; |
104 | unsigned sb : 1; |
105 | unsigned mode : 2; |
106 | unsigned : 6; |
107 | } pipeline; |
108 | struct { |
109 | unsigned sbid : 5; |
110 | unsigned type : 2; // .dst: 0, .src: 1, .set: 2 |
111 | unsigned sb : 1; |
112 | unsigned mode : 2; |
113 | unsigned : 6; |
114 | } scoreboard; |
115 | struct { |
116 | unsigned sbid : 5; |
117 | unsigned dist : 3; |
118 | unsigned mode : 2; |
119 | unsigned : 6; |
120 | } combined; |
121 | uint16_t all; |
122 | }; |
123 | |
124 | constexpr SWSBInfoXeHPC(uint16_t all_, bool dummy) : all{all_} {} |
125 | |
126 | static constexpr14 unsigned combinedMode(SWSBInfo info, Opcode op) { |
127 | auto pipe = info.getPipe(); |
128 | if (info.parts.src && info.parts.dst) |
129 | return (pipe == Pipe::F) ? 2 : (pipe == Pipe::I) ? 3 : 1; |
130 | if (info.parts.src) return 2; |
131 | if (info.parts.dst) return (pipe == Pipe::A || op == Opcode::dpas) ? 3 : 1; |
132 | return 0; |
133 | } |
134 | |
135 | public: |
136 | constexpr SWSBInfoXeHPC() : all{0} {} |
137 | |
138 | SWSBInfoXeHPC(SWSBInfo info, Opcode op) { |
139 | if (info.hasDist() && info.hasToken()) { |
140 | combined.sbid = info.parts.token; |
141 | combined.dist = info.parts.dist; |
142 | combined.mode = combinedMode(info, op); |
143 | } else if (info.hasDist()) { |
144 | pipeline.dist = info.parts.dist; |
145 | pipeline.pipe = info.parts.pipe; |
146 | pipeline.sb = false; |
147 | pipeline.mode = 0; |
148 | } else if (info.hasToken()) { |
149 | scoreboard.sbid = info.parts.token; |
150 | scoreboard.type = info.tokenMode() - 1; |
151 | scoreboard.sb = true; |
152 | scoreboard.mode = 0; |
153 | } else if (info.parts.noacc) |
154 | all = 0xF0; |
155 | else |
156 | all = 0; |
157 | } |
158 | |
159 | SWSBInfo decode(Opcode op) const { |
160 | if (all == 0xF0) |
161 | return SWSBInfo::createNoAccSBSet(); |
162 | |
163 | auto result = SWSBInfo(pipe(op), dist()); |
164 | if (combined.mode) { |
165 | bool src, dst; |
166 | if (op == Opcode::send || op == Opcode::sendc) |
167 | src = dst = true; |
168 | else if (op == Opcode::dpas) { |
169 | src = (combined.mode <= 2); |
170 | dst = combined.mode & 1; |
171 | } else { |
172 | dst = combined.mode & 1; |
173 | src = !dst; |
174 | } |
175 | result = result | SWSBInfo(combined.sbid, src, dst); |
176 | } else if (scoreboard.sb) |
177 | result = result | SWSBInfo(scoreboard.sbid, scoreboard.type != 0, scoreboard.type != 1); |
178 | |
179 | return result; |
180 | } |
181 | |
182 | constexpr bool empty() const { return all == 0; } |
183 | constexpr14 int dist() const { |
184 | if (combined.mode) |
185 | return combined.dist; |
186 | else if (!scoreboard.sb) |
187 | return pipeline.dist; |
188 | else |
189 | return 0; |
190 | } |
191 | constexpr14 Pipe pipe(Opcode op) const { |
192 | if (combined.mode) { |
193 | if (op == Opcode::send || op == Opcode::sendc) |
194 | return (combined.mode == 1) ? Pipe::A : (combined.mode == 2) ? Pipe::F : Pipe::I; |
195 | if (op == Opcode::dpas) |
196 | return Pipe::Default; |
197 | return (combined.mode == 3) ? Pipe::A : Pipe::Default; |
198 | } else if (!scoreboard.sb) { |
199 | const Pipe table[8] = {Pipe::Default, Pipe::A, Pipe::F, Pipe::I, Pipe::L, Pipe::M, Pipe::A, Pipe::A}; |
200 | return table[pipeline.pipe]; |
201 | } else |
202 | return Pipe::Default; |
203 | } |
204 | |
205 | constexpr uint16_t raw() const { return all; } |
206 | static constexpr14 SWSBInfoXeHPC createFromRaw(uint16_t all_) { return SWSBInfoXeHPC(all_, false); } |
207 | }; |
208 | |
209 | // 24 bits of data common between src0 and src1 (lower 16 bits common with dst) |
210 | union BinaryOperand12 { |
211 | uint32_t bits; |
212 | struct { |
213 | unsigned hs : 2; |
214 | unsigned regFile : 1; |
215 | unsigned subRegNum : 5; |
216 | unsigned regNum : 8; |
217 | unsigned addrMode : 1; // = 0 (direct) |
218 | unsigned width : 3; |
219 | unsigned vs : 4; |
220 | } direct; |
221 | struct { |
222 | unsigned hs : 2; |
223 | unsigned addrOff : 10; |
224 | unsigned addrReg : 4; |
225 | unsigned addrMode : 1; // = 1 (indirect) |
226 | unsigned width : 3; |
227 | unsigned vs : 4; |
228 | } indirect; |
229 | struct { |
230 | unsigned : 20; |
231 | unsigned vs : 3; |
232 | unsigned subRegNum0 : 1; |
233 | } directXeHPC; |
234 | struct { |
235 | unsigned : 20; |
236 | unsigned vs : 3; |
237 | unsigned addrOff0 : 1; |
238 | } indirectXeHPC; |
239 | }; |
240 | |
241 | // 16 bits of data common between dst, src0/1/2 for 3-source instructions |
242 | union TernaryOperand12 { |
243 | uint16_t bits; |
244 | struct { |
245 | unsigned hs : 2; |
246 | unsigned regFile : 1; |
247 | unsigned subRegNum : 5; // mme# for math |
248 | unsigned regNum : 8; |
249 | } direct; |
250 | }; |
251 | |
252 | struct Instruction12 { |
253 | union { |
254 | struct { // Lower 35 bits are essentially common. |
255 | unsigned opcode : 8; // High bit reserved, used for auto-SWSB flag. |
256 | unsigned swsb : 8; |
257 | unsigned execSize : 3; |
258 | unsigned execOffset : 3; |
259 | unsigned flagReg : 2; |
260 | unsigned predCtrl : 4; |
261 | unsigned predInv : 1; |
262 | unsigned cmptCtrl : 1; |
263 | unsigned debugCtrl : 1; |
264 | unsigned maskCtrl : 1; |
265 | // |
266 | unsigned atomicCtrl : 1; |
267 | unsigned accWrCtrl : 1; |
268 | unsigned saturate : 1; |
269 | unsigned : 29; |
270 | // |
271 | unsigned : 32; |
272 | unsigned : 32; |
273 | } common; |
274 | struct { |
275 | unsigned : 8; |
276 | unsigned swsb : 10; |
277 | unsigned execSize : 3; |
278 | unsigned flagReg : 3; |
279 | unsigned execOffset : 2; |
280 | unsigned predCtrl : 2; |
281 | unsigned : 4; |
282 | // |
283 | unsigned : 1; |
284 | unsigned dstExt : 1; // Low bit of subRegNum [direct] or addrOff [indirect] |
285 | unsigned : 30; |
286 | // |
287 | unsigned : 32; |
288 | unsigned : 32; |
289 | } commonXeHPC; |
290 | struct { |
291 | unsigned : 32; |
292 | // |
293 | unsigned : 3; |
294 | unsigned dstAddrMode : 1; |
295 | unsigned dstType : 4; |
296 | unsigned src0Type : 4; |
297 | unsigned src0Mods : 2; |
298 | unsigned src0Imm : 1; |
299 | unsigned src1Imm : 1; |
300 | unsigned dst : 16; // first 16 bits of BinaryOperand12 |
301 | // |
302 | unsigned src0 : 24; // BinaryOperand12 |
303 | unsigned src1Type : 4; |
304 | unsigned cmod : 4; |
305 | // |
306 | unsigned src1 : 24; // BinaryOperand12 |
307 | unsigned src1Mods : 2; |
308 | unsigned _ : 6; |
309 | } binary; |
310 | struct { |
311 | uint64_t _; |
312 | uint32_t __; |
313 | uint32_t value; |
314 | } imm32; |
315 | struct { |
316 | uint64_t _; |
317 | uint32_t high; |
318 | uint32_t low; |
319 | } imm64; |
320 | struct { |
321 | unsigned : 32; // common |
322 | unsigned : 3; |
323 | unsigned src0VS0 : 1; |
324 | unsigned dstType : 3; |
325 | unsigned execType : 1; |
326 | unsigned src0Type : 3; |
327 | unsigned src0VS1 : 1; |
328 | unsigned src0Mods : 2; |
329 | unsigned src0Imm : 1; |
330 | unsigned src2Imm : 1; |
331 | unsigned dst : 16; // TernaryOperand12 or immediate |
332 | // |
333 | unsigned src0 : 16; |
334 | unsigned src2Type : 3; |
335 | unsigned src1VS0 : 1; |
336 | unsigned src2Mods : 2; |
337 | unsigned src1Mods : 2; |
338 | unsigned src1Type : 3; |
339 | unsigned src1VS1 : 1; |
340 | unsigned cmod : 4; // same location as binary |
341 | // |
342 | unsigned src1 : 16; // TernaryOperand12 |
343 | unsigned src2 : 16; // TernaryOperand12 or immediate |
344 | } ternary; |
345 | struct { |
346 | unsigned : 32; |
347 | unsigned : 32; |
348 | unsigned : 20; |
349 | unsigned bfnCtrl03 : 4; |
350 | unsigned : 4; |
351 | unsigned bfnCtrl47 : 4; |
352 | unsigned : 32; |
353 | } bfn; |
354 | struct { |
355 | unsigned : 32; |
356 | // |
357 | unsigned : 11; |
358 | unsigned rcount : 3; |
359 | unsigned : 2; |
360 | unsigned sdepth : 2; |
361 | unsigned : 14; |
362 | // |
363 | unsigned : 20; |
364 | unsigned src2SubBytePrecision : 2; |
365 | unsigned src1SubBytePrecision : 2; |
366 | unsigned : 8; |
367 | // |
368 | unsigned : 32; |
369 | } dpas; |
370 | struct { |
371 | unsigned : 32; |
372 | // |
373 | unsigned : 1; |
374 | unsigned fusionCtrl : 1; |
375 | unsigned eot : 1; |
376 | unsigned exDesc11_23 : 13; |
377 | unsigned descIsReg : 1; |
378 | unsigned exDescIsReg : 1; |
379 | unsigned dstRegFile : 1; |
380 | unsigned desc20_24 : 5; |
381 | unsigned dstReg : 8; |
382 | // |
383 | unsigned exDesc24_25 : 2; |
384 | unsigned src0RegFile : 1; |
385 | unsigned desc25_29 : 5; |
386 | unsigned src0Reg : 8; |
387 | unsigned : 1; |
388 | unsigned desc0_10 : 11; |
389 | unsigned sfid : 4; |
390 | // |
391 | unsigned exDesc26_27 : 2; |
392 | unsigned src1RegFile : 1; |
393 | unsigned exDesc6_10 : 5; |
394 | unsigned src1Reg : 8; |
395 | unsigned : 1; |
396 | unsigned desc11_19 : 9; |
397 | unsigned desc30_31 : 2; |
398 | unsigned exDesc28_31 : 4; |
399 | } send; |
400 | struct { |
401 | unsigned : 32; |
402 | unsigned : 8; |
403 | unsigned exDescReg : 3; |
404 | unsigned : 21; |
405 | unsigned : 32; |
406 | unsigned : 32; |
407 | } sendIndirect; |
408 | struct { |
409 | unsigned : 32; // common |
410 | unsigned : 1; |
411 | unsigned branchCtrl : 1; |
412 | unsigned : 30; |
413 | int32_t uip; |
414 | int32_t jip; |
415 | } branches; |
416 | uint64_t qword[2]; |
417 | }; |
418 | |
419 | constexpr Instruction12() : qword{0,0} {}; |
420 | |
421 | // Decoding routines for auto-SWSB. |
422 | bool autoSWSB() const { return (common.opcode & 0x80); } |
423 | SWSBInfo swsb() const { return SWSBInfo12::createFromRaw(common.swsb).decode(opcode()); } |
424 | void setSWSB(SWSBInfo swsb) { common.swsb = SWSBInfo12(swsb, opcode()).raw(); } |
425 | void clearAutoSWSB() { common.opcode &= 0x7F; } |
426 | Opcode opcode() const { return static_cast<Opcode>(common.opcode & 0x7F); } |
427 | SyncFunction syncFC() const { return static_cast<SyncFunction>(binary.cmod); } |
428 | SharedFunction sfid() const { return static_cast<SharedFunction>(send.sfid); } |
429 | bool eot() const { return (opcode() == Opcode::send || opcode() == Opcode::sendc) && send.eot; } |
430 | bool predicated() const { return !common.maskCtrl || (static_cast<PredCtrl>(common.predCtrl) != PredCtrl::None); } |
431 | bool atomic() const { return common.atomicCtrl; } |
432 | unsigned dstTypecode() const { return binary.dstType; } |
433 | unsigned src0Typecode() const { return srcTypecode(0); } |
434 | unsigned src1Typecode() const { return srcTypecode(1); } |
435 | void shiftJIP(int32_t shift) { branches.jip += shift * sizeof(Instruction12); } |
436 | void shiftUIP(int32_t shift) { branches.uip += shift * sizeof(Instruction12); } |
437 | |
438 | inline autoswsb::DestinationMask destinations(int &jip, int &uip) const; |
439 | template <bool xeHPC = false> |
440 | inline bool getOperandRegion(autoswsb::DependencyRegion ®ion, int opNum) const; |
441 | inline bool getImm32(uint32_t &imm) const; |
442 | inline bool getSendDesc(MessageDescriptor &desc) const; |
443 | inline bool getARFType(ARFType &arfType, int opNum) const; |
444 | |
445 | bool isMathMacro() const { |
446 | if (opcode() != Opcode::math) return false; |
447 | auto fc = static_cast<MathFunction>(binary.cmod); |
448 | return (fc == MathFunction::invm || fc == MathFunction::rsqtm); |
449 | } |
450 | |
451 | protected: |
452 | inline unsigned srcTypecode(int opNum) const; |
453 | }; |
454 | |
455 | static_assert(sizeof(Instruction12) == 16, "Internal error: Instruction12 has been padded by the compiler." ); |
456 | |
457 | struct InstructionXeHPC : public Instruction12 { |
458 | SWSBInfo swsb() const { return SWSBInfoXeHPC::createFromRaw(commonXeHPC.swsb).decode(opcode()); } |
459 | void setSWSB(SWSBInfo swsb) { commonXeHPC.swsb = SWSBInfoXeHPC(swsb, opcode()).raw(); } |
460 | |
461 | template <bool xeHPC = true> |
462 | bool getOperandRegion(autoswsb::DependencyRegion ®ion, int opNum) const { |
463 | return Instruction12::getOperandRegion<true>(region, opNum); |
464 | } |
465 | }; |
466 | |
467 | static_assert(sizeof(InstructionXeHPC) == 16, "Internal error: InstructionXeHPC has been padded by the compiler." ); |
468 | |
469 | // Encoding routines. |
470 | |
471 | static inline unsigned getTypecode12(DataType type) |
472 | { |
473 | static const uint8_t conversionTable[32] = {2,6,1,5,0,4,11,10,3,7,9,13,8,0,4,8, |
474 | 14,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}; |
475 | return conversionTable[static_cast<unsigned>(type) & 0x1F]; |
476 | } |
477 | |
478 | static inline unsigned pow2Encode(unsigned x) |
479 | { |
480 | return (x == 0) ? 0 : (1 + utils::log2(x)); |
481 | } |
482 | |
483 | template <bool dest, bool encodeHS = true> |
484 | static inline constexpr14 BinaryOperand12 encodeBinaryOperand12(const RegData &rd, EncodingTag12 tag) |
485 | { |
486 | BinaryOperand12 op{0}; |
487 | |
488 | #ifdef NGEN_SAFE |
489 | if (rd.isInvalid()) throw invalid_object_exception(); |
490 | #endif |
491 | |
492 | if (rd.isIndirect()) { |
493 | op.indirect.addrOff = rd.getOffset(); |
494 | op.indirect.addrReg = rd.getIndirectOff(); |
495 | op.indirect.addrMode = 1; |
496 | if (!dest) |
497 | op.indirect.vs = (rd.isVxIndirect()) ? 0xFFFF : pow2Encode(rd.getVS()); |
498 | } else { |
499 | op.direct.regFile = getRegFile(rd); |
500 | op.direct.subRegNum = rd.getByteOffset(); |
501 | op.direct.regNum = rd.getBase(); |
502 | op.direct.addrMode = 0; |
503 | if (!dest) |
504 | op.direct.vs = pow2Encode(rd.getVS()); |
505 | } |
506 | |
507 | if (encodeHS) |
508 | op.direct.hs = pow2Encode(rd.getHS()); |
509 | |
510 | if (!dest) op.direct.width = utils::log2(rd.getWidth()); |
511 | |
512 | return op; |
513 | } |
514 | |
515 | template <bool dest, bool encodeHS = true> |
516 | static inline constexpr14 BinaryOperand12 encodeBinaryOperand12(const RegData &rd, EncodingTagXeHPC tag) |
517 | { |
518 | BinaryOperand12 op{0}; |
519 | |
520 | #ifdef NGEN_SAFE |
521 | if (rd.isInvalid()) throw invalid_object_exception(); |
522 | #endif |
523 | |
524 | if (rd.isIndirect()) { |
525 | op.indirect.addrOff = (rd.getOffset() >> 1); |
526 | op.indirect.addrReg = rd.getIndirectOff(); |
527 | op.indirect.addrMode = 1; |
528 | if (!dest) { |
529 | op.indirect.vs = (rd.isVxIndirect()) ? 0xFFFF : pow2Encode(rd.getVS()); |
530 | op.indirectXeHPC.addrOff0 = (rd.getOffset() & 1); |
531 | } |
532 | } else { |
533 | op.direct.regFile = getRegFile(rd); |
534 | op.direct.subRegNum = (rd.getByteOffset() >> 1); |
535 | op.direct.regNum = rd.getBase(); |
536 | op.direct.addrMode = 0; |
537 | if (!dest) { |
538 | op.directXeHPC.vs = pow2Encode(rd.getVS()); |
539 | op.directXeHPC.subRegNum0 = rd.getByteOffset() & 1; |
540 | } |
541 | } |
542 | |
543 | if (encodeHS) |
544 | op.direct.hs = pow2Encode(rd.getHS()); |
545 | |
546 | if (!dest) op.direct.width = utils::log2(rd.getWidth()); |
547 | |
548 | return op; |
549 | } |
550 | |
551 | template <bool dest, typename Tag> |
552 | static inline constexpr14 BinaryOperand12 encodeBinaryOperand12(const ExtendedReg ®, Tag tag) |
553 | { |
554 | auto op = encodeBinaryOperand12<dest>(reg.getBase(), tag); |
555 | op.direct.subRegNum = reg.getMMENum(); |
556 | |
557 | return op; |
558 | } |
559 | |
560 | template <bool dest, bool encodeHS = true> |
561 | static inline constexpr14 TernaryOperand12 encodeTernaryOperand12(const RegData &rd, EncodingTag12 tag) |
562 | { |
563 | #ifdef NGEN_SAFE |
564 | if (rd.isInvalid()) throw invalid_object_exception(); |
565 | if (rd.isIndirect()) throw invalid_operand_exception(); |
566 | #endif |
567 | |
568 | TernaryOperand12 op{0}; |
569 | |
570 | if (encodeHS) |
571 | op.direct.hs = dest ? utils::log2(rd.getHS()) : pow2Encode(rd.getHS()); |
572 | |
573 | op.direct.regFile = getRegFile(rd); |
574 | op.direct.subRegNum = rd.getByteOffset(); |
575 | op.direct.regNum = rd.getBase(); |
576 | |
577 | return op; |
578 | } |
579 | |
580 | template <bool dest, bool encodeHS = true> |
581 | static inline constexpr14 TernaryOperand12 encodeTernaryOperand12(const RegData &rd, EncodingTagXeHPC tag) |
582 | { |
583 | #ifdef NGEN_SAFE |
584 | if (rd.isInvalid()) throw invalid_object_exception(); |
585 | if (rd.isIndirect()) throw invalid_operand_exception(); |
586 | #endif |
587 | |
588 | TernaryOperand12 op{0}; |
589 | |
590 | if (encodeHS) |
591 | op.direct.hs = dest ? utils::log2(rd.getHS()) : pow2Encode(rd.getHS()); |
592 | |
593 | op.direct.regFile = getRegFile(rd); |
594 | op.direct.subRegNum = rd.getByteOffset() >> 1; |
595 | op.direct.regNum = rd.getBase(); |
596 | |
597 | return op; |
598 | } |
599 | |
600 | template <bool dest, typename Tag> |
601 | static inline constexpr14 TernaryOperand12 encodeTernaryOperand12(const ExtendedReg ®, Tag tag) |
602 | { |
603 | auto op = encodeTernaryOperand12<dest>(reg.getBase(), tag); |
604 | op.direct.subRegNum = reg.getMMENum(); |
605 | |
606 | return op; |
607 | } |
608 | |
609 | static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag) |
610 | { |
611 | i.common.opcode = static_cast<unsigned>(opcode) | (mod.parts.autoSWSB << 7); |
612 | i.common.swsb = SWSBInfo12(mod.getSWSB(), opcode).raw(); |
613 | i.common.execSize = mod.parts.eSizeField; |
614 | i.common.execOffset = mod.parts.chanOff; |
615 | i.common.flagReg = (mod.parts.flagRegNum << 1) | mod.parts.flagSubRegNum; |
616 | i.common.predCtrl = mod.parts.predCtrl; |
617 | i.common.predInv = mod.parts.predInv; |
618 | i.common.cmptCtrl = mod.parts.cmptCtrl; |
619 | i.common.debugCtrl = mod.parts.debugCtrl; |
620 | i.common.maskCtrl = mod.parts.maskCtrl; |
621 | i.common.atomicCtrl = mod.parts.threadCtrl; |
622 | i.common.accWrCtrl = mod.parts.accWrCtrl; |
623 | i.common.saturate = mod.parts.saturate; |
624 | } |
625 | |
626 | static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag) |
627 | { |
628 | i.common.opcode = static_cast<unsigned>(opcode) | (mod.parts.autoSWSB << 7); |
629 | i.commonXeHPC.swsb = SWSBInfoXeHPC(mod.getSWSB(), opcode).raw(); |
630 | i.commonXeHPC.execSize = mod.parts.eSizeField; |
631 | i.commonXeHPC.flagReg = (mod.parts.flagRegNum1 << 2) | (mod.parts.flagRegNum << 1) | mod.parts.flagSubRegNum; |
632 | i.commonXeHPC.execOffset = mod.parts.chanOff >> 1; |
633 | i.commonXeHPC.predCtrl = mod.parts.predCtrl; |
634 | i.common.predInv = mod.parts.predInv; |
635 | i.common.cmptCtrl = mod.parts.cmptCtrl; |
636 | i.common.debugCtrl = mod.parts.debugCtrl; |
637 | i.common.maskCtrl = mod.parts.maskCtrl; |
638 | i.common.atomicCtrl = mod.parts.threadCtrl; |
639 | i.commonXeHPC.dstExt = (dst.isIndirect() ? dst.getOffset() : dst.getByteOffset()) & 1; |
640 | i.common.saturate = mod.parts.saturate; |
641 | } |
642 | |
643 | template <typename Tag> |
644 | static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const ExtendedReg &dst, Tag tag) |
645 | { |
646 | encodeCommon12(i, opcode, mod, dst.getBase(), tag); |
647 | } |
648 | |
649 | static inline unsigned encodeTernaryVS01(const RegData &rd) |
650 | { |
651 | switch (rd.getVS()) { |
652 | case 0: return 0; |
653 | case 1: return 1; |
654 | case 4: return 2; |
655 | case 8: return 3; |
656 | default: |
657 | #ifdef NGEN_SAFE |
658 | if (rd.getHS() == 0) |
659 | throw invalid_region_exception(); |
660 | #endif |
661 | return 3; |
662 | } |
663 | } |
664 | |
665 | static inline unsigned encodeTernaryVS01(const ExtendedReg ®) |
666 | { |
667 | return encodeTernaryVS01(reg.getBase()); |
668 | } |
669 | |
670 | template <typename D, typename S0, typename S1, typename S2> |
671 | static inline void encodeTernaryTypes(Instruction12 &i, D dst, S0 src0, S1 src1, S2 src2) |
672 | { |
673 | auto dtype = getTypecode12(dst.getType()); |
674 | auto s0type = getTypecode12(src0.getType()); |
675 | auto s1type = getTypecode12(src1.getType()); |
676 | auto s2type = getTypecode12(src2.getType()); |
677 | |
678 | i.ternary.execType = (dtype >> 3); |
679 | i.ternary.dstType = dtype; |
680 | i.ternary.src0Type = s0type; |
681 | i.ternary.src1Type = s1type; |
682 | i.ternary.src2Type = s2type; |
683 | |
684 | #ifdef NGEN_SAFE |
685 | if (((dtype & s0type & s1type & s2type) ^ (dtype | s0type | s1type | s2type)) & 8) |
686 | throw ngen::invalid_type_exception(); |
687 | #endif |
688 | } |
689 | |
690 | template <typename S0, typename Tag> |
691 | static inline void encodeTernarySrc0(Instruction12 &i, S0 src0, Tag tag) |
692 | { |
693 | i.ternary.src0 = encodeTernaryOperand12<false>(src0, tag).bits; |
694 | i.ternary.src0Mods = src0.getMods(); |
695 | |
696 | auto vs0 = encodeTernaryVS01(src0); |
697 | |
698 | i.ternary.src0VS0 = vs0; |
699 | i.ternary.src0VS1 = vs0 >> 1; |
700 | } |
701 | |
702 | template <typename Tag> |
703 | static inline void encodeTernarySrc0(Instruction12 &i, const Immediate &src0, Tag tag) |
704 | { |
705 | i.ternary.src0Imm = true; |
706 | i.ternary.src0 = static_cast<uint64_t>(src0); |
707 | } |
708 | |
709 | template <typename S1, typename Tag> |
710 | static inline void encodeTernarySrc1(Instruction12 &i, S1 src1, Tag tag) |
711 | { |
712 | i.ternary.src1 = encodeTernaryOperand12<false>(src1, tag).bits; |
713 | i.ternary.src1Mods = src1.getMods(); |
714 | |
715 | auto vs1 = encodeTernaryVS01(src1); |
716 | |
717 | i.ternary.src1VS0 = vs1; |
718 | i.ternary.src1VS1 = vs1 >> 1; |
719 | } |
720 | |
721 | template <typename S2, typename Tag> |
722 | static inline void encodeTernarySrc2(Instruction12 &i, S2 src2, Tag tag) |
723 | { |
724 | i.ternary.src2 = encodeTernaryOperand12<false>(src2, tag).bits; |
725 | i.ternary.src2Mods = src2.getMods(); |
726 | } |
727 | |
728 | template <typename Tag> |
729 | static inline void encodeTernarySrc2(Instruction12 &i, const Immediate &src2, Tag tag) |
730 | { |
731 | i.ternary.src2Imm = true; |
732 | i.ternary.src2 = static_cast<uint64_t>(src2); |
733 | } |
734 | |
735 | static inline void encodeSendExDesc(Instruction12 &i, uint32_t exdesc) |
736 | { |
737 | i.send.eot = (exdesc >> 5); |
738 | i.send.exDesc6_10 = (exdesc >> 6); |
739 | i.send.exDesc11_23 = (exdesc >> 11); |
740 | i.send.exDesc24_25 = (exdesc >> 24); |
741 | i.send.exDesc26_27 = (exdesc >> 26); |
742 | i.send.exDesc28_31 = (exdesc >> 28); |
743 | } |
744 | |
745 | static inline void encodeSendExDesc(Instruction12 &i, RegData exdesc) |
746 | { |
747 | #ifdef NGEN_SAFE |
748 | // Only a0.x:ud is allowed for extended descriptor. |
749 | if (!exdesc.isARF() || exdesc.getARFType() != ARFType::a || exdesc.getARFBase() != 0 || exdesc.getType() != DataType::ud) |
750 | throw invalid_arf_exception(); |
751 | #endif |
752 | i.sendIndirect.exDescReg = exdesc.getOffset(); |
753 | i.send.exDescIsReg = true; |
754 | } |
755 | |
756 | static inline void encodeSendDesc(Instruction12 &i, uint32_t desc) |
757 | { |
758 | i.send.desc0_10 = (desc >> 0); |
759 | i.send.desc11_19 = (desc >> 11); |
760 | i.send.desc20_24 = (desc >> 20); |
761 | i.send.desc25_29 = (desc >> 25); |
762 | i.send.desc30_31 = (desc >> 30); |
763 | } |
764 | |
765 | static inline void encodeSendDesc(Instruction12 &i, RegData desc) |
766 | { |
767 | #ifdef NGEN_SAFE |
768 | // Only a0.0:ud is allowed for desc. |
769 | if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0) |
770 | throw invalid_arf_exception(); |
771 | #endif |
772 | i.send.descIsReg = true; |
773 | } |
774 | |
775 | /*********************/ |
776 | /* Decoding Routines */ |
777 | /*********************/ |
778 | |
779 | static inline DataType decodeRegTypecode12(unsigned dt) |
780 | { |
781 | static const DataType conversionTable[16] = { |
782 | DataType::ub, DataType::uw, DataType::ud, DataType::uq, |
783 | DataType::b, DataType::w, DataType::d, DataType::q, |
784 | DataType::invalid, DataType::hf, DataType::f, DataType::df, |
785 | DataType::invalid, DataType::bf, DataType::tf32, DataType::bf8 |
786 | }; |
787 | return conversionTable[dt & 0xF]; |
788 | } |
789 | |
790 | static inline int decodeDPASTypecodeBytes12(unsigned dt) |
791 | { |
792 | return (1 << (dt & 3)); |
793 | } |
794 | |
795 | template <bool xeHPC> |
796 | bool Instruction12::getOperandRegion(autoswsb::DependencyRegion ®ion, int opNum) const |
797 | { |
798 | using namespace autoswsb; |
799 | |
800 | auto hw = region.hw; |
801 | auto op = opcode(); |
802 | RegData rd; |
803 | |
804 | switch (op) { |
805 | case Opcode::nop_gen12: |
806 | case Opcode::illegal: |
807 | return false; |
808 | case Opcode::wrdep: |
809 | if (opNum != 0) return false; |
810 | BinaryOperand12 o0, o1; |
811 | o0.bits = binary.src0; |
812 | o1.bits = binary.src1; |
813 | region = DependencyRegion(hw, GRF(o0.direct.regNum)-GRF(o1.direct.regNum)); |
814 | return true; |
815 | case Opcode::dpas: |
816 | case Opcode::dpasw: { |
817 | unsigned sdepth = 1 << dpas.sdepth; |
818 | unsigned rcount = 1 + dpas.rcount; |
819 | unsigned len; |
820 | TernaryOperand12 o; |
821 | |
822 | switch (opNum) { |
823 | case -1: |
824 | len = (rcount * decodeDPASTypecodeBytes12(ternary.dstType) + 3) >> 2; |
825 | o.bits = ternary.dst; |
826 | break; |
827 | case 0: |
828 | len = (rcount * decodeDPASTypecodeBytes12(ternary.src0Type) + 3) >> 2; |
829 | o.bits = ternary.src0; |
830 | break; |
831 | case 1: len = sdepth; o.bits = ternary.src1; break; |
832 | case 2: { |
833 | if (op == Opcode::dpasw) rcount = (rcount + 1) >> 1; |
834 | o.bits = ternary.src2; |
835 | auto sr = o.direct.subRegNum; |
836 | if (xeHPC) |
837 | len = ((sr << 1) + sdepth * rcount * 4 + 63) >> 6; |
838 | else |
839 | len = (sr + sdepth * rcount * 4 + 31) >> 5; |
840 | break; |
841 | } |
842 | default: return false; |
843 | } |
844 | |
845 | region = DependencyRegion(hw, GRFRange(o.direct.regNum, len)); |
846 | return true; |
847 | } |
848 | case Opcode::send: |
849 | case Opcode::sendc: { |
850 | int base = 0, len = 0; |
851 | switch (opNum) { |
852 | case -1: |
853 | if (send.dstRegFile == RegFileARF) return false; |
854 | base = send.dstReg; |
855 | len = send.descIsReg ? -1 : send.desc20_24; |
856 | if (len == 31) len++; |
857 | break; |
858 | case 0: |
859 | if (send.src0RegFile == RegFileARF) return false; |
860 | base = send.src0Reg; |
861 | len = send.descIsReg ? -1 : (send.desc25_29 & 0xF); |
862 | break; |
863 | case 1: |
864 | if (send.src1RegFile == RegFileARF) return false; |
865 | base = send.src1Reg; |
866 | len = send.exDescIsReg ? -1 : send.exDesc6_10; |
867 | break; |
868 | case 2: |
869 | case 3: // TODO: May need to track indirect acc usage |
870 | default: return false; |
871 | } |
872 | |
873 | if (len == 0) |
874 | return false; |
875 | else if (len == -1) |
876 | region = DependencyRegion(hw); |
877 | else |
878 | region = DependencyRegion(hw, GRFRange(base, len)); |
879 | return true; |
880 | } |
881 | case Opcode::dp4a: |
882 | case Opcode::add3: |
883 | case Opcode::bfn: |
884 | case Opcode::bfe_gen12: |
885 | case Opcode::bfi2_gen12: |
886 | case Opcode::csel_gen12: |
887 | case Opcode::mad: |
888 | case Opcode::madm: { // ternary |
889 | TernaryOperand12 o; |
890 | unsigned dt = 0, vs = 0; |
891 | switch (opNum) { |
892 | case -1: |
893 | o.bits = ternary.dst; |
894 | dt = ternary.dstType; |
895 | break; |
896 | case 0: |
897 | if (ternary.src0Imm) return false; |
898 | o.bits = ternary.src0; |
899 | dt = ternary.src0Type; |
900 | vs = ternary.src0VS0 + (ternary.src0VS1 * 3); |
901 | break; |
902 | case 1: |
903 | o.bits = ternary.src1; |
904 | dt = ternary.src1Type; |
905 | vs = ternary.src1VS0 + (ternary.src1VS1 * 3); |
906 | break; |
907 | case 2: |
908 | if (ternary.src2Imm) return false; |
909 | o.bits = ternary.src2; |
910 | dt = ternary.src2Type; |
911 | break; |
912 | default: return false; |
913 | } |
914 | dt |= (ternary.execType << 3); |
915 | if (op == Opcode::madm) o.direct.subRegNum = 0; |
916 | auto base = GRF(o.direct.regNum).retype(decodeRegTypecode12(dt)); |
917 | auto sr = o.direct.subRegNum; |
918 | if (xeHPC) sr <<= 1; |
919 | auto sub = base[sr / getBytes(base.getType())]; |
920 | auto hs = (1 << o.direct.hs); |
921 | if (opNum >= 0) hs >>= 1; |
922 | if ((opNum < 0) || (opNum == 2)) |
923 | rd = sub(hs); |
924 | else |
925 | rd = sub((1 << vs) >> 1, hs); |
926 | |
927 | if (o.direct.regFile == RegFileARF) { |
928 | rd.setARF(true); |
929 | if (!autoswsb::trackableARF(rd.getARFType())) |
930 | return false; |
931 | } |
932 | break; |
933 | } |
934 | default: { // unary/binary |
935 | BinaryOperand12 o; |
936 | unsigned dt; |
937 | switch (opNum) { |
938 | case -1: |
939 | o.bits = binary.dst; |
940 | dt = binary.dstType; |
941 | break; |
942 | case 0: |
943 | if (binary.src0Imm) return false; |
944 | o.bits = binary.src0; |
945 | dt = binary.src0Type; |
946 | break; |
947 | case 1: |
948 | if (binary.src0Imm || binary.src1Imm) return false; |
949 | o.bits = binary.src1; |
950 | dt = binary.src1Type; |
951 | break; |
952 | default: return false; |
953 | } |
954 | if (o.direct.addrMode) { region = DependencyRegion(hw); return true; } // indirect |
955 | if (isMathMacro()) |
956 | o.direct.subRegNum = 0; |
957 | auto sr = xeHPC ? ((o.direct.subRegNum << 1) | o.directXeHPC.subRegNum0) |
958 | : o.direct.subRegNum; |
959 | auto vs = xeHPC ? o.directXeHPC.vs : o.direct.vs; |
960 | auto base = GRF(o.direct.regNum).retype(decodeRegTypecode12(dt)); |
961 | auto sub = base[sr / getBytes(base.getType())]; |
962 | auto hs = (1 << o.direct.hs) >> 1; |
963 | if (opNum < 0) |
964 | rd = sub(hs); |
965 | else |
966 | rd = sub((1 << vs) >> 1, 1 << o.direct.width, hs); |
967 | |
968 | if (o.direct.regFile == RegFileARF) { |
969 | rd.setARF(true); |
970 | if (!autoswsb::trackableARF(rd.getARFType())) |
971 | return false; |
972 | } |
973 | break; |
974 | } |
975 | } |
976 | |
977 | auto esize = 1 << ((hw >= HW::XeHPC) ? commonXeHPC.execSize : common.execSize); |
978 | rd.fixup(hw, esize, DataType::invalid, opNum < 0, 2); |
979 | region = DependencyRegion(hw, esize, rd); |
980 | return true; |
981 | } |
982 | |
983 | unsigned Instruction12::srcTypecode(int opNum) const |
984 | { |
985 | auto op = opcode(); |
986 | |
987 | switch (op) { |
988 | case Opcode::nop_gen12: |
989 | case Opcode::illegal: |
990 | case Opcode::send: |
991 | case Opcode::sendc: |
992 | case Opcode::dp4a: |
993 | return 0; |
994 | case Opcode::dpas: |
995 | case Opcode::dpasw: |
996 | // This method is only used for checking for long pipe types. |
997 | return 0; |
998 | case Opcode::add3: |
999 | case Opcode::bfn: |
1000 | case Opcode::bfe_gen12: |
1001 | case Opcode::bfi2_gen12: |
1002 | case Opcode::csel_gen12: |
1003 | case Opcode::mad: |
1004 | case Opcode::madm: // ternary |
1005 | switch (opNum) { |
1006 | case 0: return ternary.src0Type | (ternary.execType << 3); |
1007 | case 1: return ternary.src1Type | (ternary.execType << 3); |
1008 | case 2: return ternary.src2Type | (ternary.execType << 3); |
1009 | default: return 0; |
1010 | } |
1011 | default: // unary/binary |
1012 | switch (opNum) { |
1013 | case 0: return binary.src0Type; |
1014 | case 1: return binary.src1Type; |
1015 | default: return 0; |
1016 | } |
1017 | } |
1018 | |
1019 | return 0; |
1020 | } |
1021 | |
1022 | bool Instruction12::getImm32(uint32_t &imm) const |
1023 | { |
1024 | // Only need to support sync.allrd/wr. |
1025 | if (binary.src0Imm) |
1026 | imm = imm32.value; |
1027 | return binary.src0Imm; |
1028 | } |
1029 | |
1030 | bool Instruction12::getSendDesc(MessageDescriptor &desc) const |
1031 | { |
1032 | if (!send.descIsReg) |
1033 | desc.all = send.desc0_10 | (send.desc11_19 << 11) | (send.desc20_24 << 20) |
1034 | | (send.desc25_29 << 25) | (send.desc30_31 << 30); |
1035 | return !send.descIsReg; |
1036 | } |
1037 | |
1038 | bool Instruction12::getARFType(ARFType &arfType, int opNum) const |
1039 | { |
1040 | if (opNum > 1) return false; |
1041 | |
1042 | // Only need to support unary/binary, for detecting ce/cr/sr usage. |
1043 | switch (opcode()) { |
1044 | case Opcode::nop: |
1045 | case Opcode::illegal: |
1046 | case Opcode::send: |
1047 | case Opcode::sendc: |
1048 | case Opcode::bfe: |
1049 | case Opcode::bfi2: |
1050 | case Opcode::csel: |
1051 | case Opcode::mad: |
1052 | case Opcode::madm: |
1053 | case Opcode::dp4a: |
1054 | case Opcode::add3: |
1055 | case Opcode::bfn: |
1056 | case Opcode::dpas: |
1057 | case Opcode::dpasw: |
1058 | return false; |
1059 | default: { |
1060 | BinaryOperand12 o; |
1061 | switch (opNum) { |
1062 | case -1: |
1063 | o.bits = binary.dst; |
1064 | break; |
1065 | case 0: |
1066 | if (binary.src0Imm) return false; |
1067 | o.bits = binary.src0; |
1068 | break; |
1069 | case 1: |
1070 | if (binary.src0Imm || binary.src1Imm) return false; |
1071 | o.bits = binary.src1; |
1072 | break; |
1073 | default: return false; |
1074 | } |
1075 | if (o.direct.addrMode) return false; |
1076 | if (o.direct.regFile != RegFileARF) return false; |
1077 | arfType = static_cast<ARFType>(o.direct.regNum >> 4); |
1078 | return true; |
1079 | } |
1080 | } |
1081 | } |
1082 | |
1083 | autoswsb::DestinationMask Instruction12::destinations(int &jip, int &uip) const |
1084 | { |
1085 | using namespace autoswsb; |
1086 | |
1087 | if (!isBranch(opcode())) { |
1088 | if (opcode() == Opcode::send || opcode() == Opcode::sendc) |
1089 | if (send.eot) |
1090 | return DestNone; |
1091 | return DestNextIP; |
1092 | } |
1093 | |
1094 | DestinationMask mask = DestNextIP; |
1095 | switch (opcode()) { |
1096 | case Opcode::ret: |
1097 | case Opcode::endif: |
1098 | case Opcode::while_: |
1099 | case Opcode::call: |
1100 | case Opcode::calla: |
1101 | case Opcode::join: |
1102 | case Opcode::jmpi: |
1103 | case Opcode::brd: |
1104 | mask = binary.src0Imm ? (DestNextIP | DestJIP) : DestUnknown; break; |
1105 | case Opcode::goto_: |
1106 | case Opcode::if_: |
1107 | case Opcode::else_: |
1108 | case Opcode::break_: |
1109 | case Opcode::cont: |
1110 | case Opcode::halt: |
1111 | case Opcode::brc: |
1112 | mask = binary.src0Imm ? (DestNextIP | DestJIP | DestUIP) : DestUnknown; break; |
1113 | default: break; |
1114 | } |
1115 | |
1116 | if ((opcode() == Opcode::jmpi) && !predicated()) |
1117 | mask &= ~DestNextIP; |
1118 | |
1119 | if (mask & DestJIP) jip = branches.jip / sizeof(Instruction12); |
1120 | if (mask & DestUIP) uip = branches.uip / sizeof(Instruction12); |
1121 | |
1122 | return mask; |
1123 | } |
1124 | |