1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef NGEN_INTERFACE_HPP
18#define NGEN_INTERFACE_HPP
19
20
21#include "ngen_core.hpp"
22#include <sstream>
23
24
25namespace ngen {
26
27template <HW hw> class OpenCLCodeGenerator;
28template <HW hw> class L0CodeGenerator;
29
30// Exceptions.
31#ifdef NGEN_SAFE
32class unknown_argument_exception : public std::runtime_error {
33public:
34 unknown_argument_exception() : std::runtime_error("Argument not found") {}
35};
36
37class bad_argument_type_exception : public std::runtime_error {
38public:
39 bad_argument_type_exception() : std::runtime_error("Bad argument type") {}
40};
41
42class interface_not_finalized : public std::runtime_error {
43public:
44 interface_not_finalized() : std::runtime_error("Interface has not been finalized") {}
45};
46
47class use_simd1_local_id_exception : public std::runtime_error {
48public:
49 use_simd1_local_id_exception() : std::runtime_error("Use getSIMD1LocalID for SIMD1 kernels") {}
50};
51#endif
52
53enum class ExternalArgumentType { Scalar, GlobalPtr, LocalPtr, Hidden };
54enum class GlobalAccessType { None = 0, Stateless = 1, Surface = 2, All = 3 };
55
56static inline GlobalAccessType operator|(GlobalAccessType access1, GlobalAccessType access2)
57{
58 return static_cast<GlobalAccessType>(static_cast<int>(access1) | static_cast<int>(access2));
59}
60
61class InterfaceHandler
62{
63 template <HW hw> friend class OpenCLCodeGenerator;
64 template <HW hw> friend class L0CodeGenerator;
65
66public:
67 InterfaceHandler(HW hw_) : hw(hw_), simd(GRF::bytes(hw_) >> 2)
68 , inlineGRFs(defaultInlineGRFs(hw))
69 {}
70
71 inline void externalName(const std::string &name) { kernelName = name; }
72
73 template <typename DT>
74 inline void newArgument(std::string name) { newArgument(name, getDataType<DT>()); }
75 inline void newArgument(std::string name, DataType type, ExternalArgumentType exttype = ExternalArgumentType::Scalar, GlobalAccessType access = GlobalAccessType::All);
76 inline void newArgument(std::string name, ExternalArgumentType exttype, GlobalAccessType access = GlobalAccessType::All);
77
78 inline Subregister getArgument(const std::string &name) const;
79 inline Subregister getArgumentIfExists(const std::string &name) const;
80 inline int getArgumentSurface(const std::string &name) const;
81 inline int getArgumentSurfaceIfExists(const std::string &name) const;
82 inline GRF getLocalID(int dim) const;
83 inline RegData getSIMD1LocalID(int dim) const;
84 inline Subregister getLocalSize(int dim) const;
85
86 const std::string &getExternalName() const { return kernelName; }
87 int getSIMD() const { return simd; }
88 int getGRFCount() const { return needGRF; }
89 size_t getSLMSize() const { return slmSize; }
90
91 void require32BitBuffers() { allow64BitBuffers = false; }
92 void requireBarrier() { barrierCount = 1; }
93 void requireBarriers(int nBarriers) { barrierCount = nBarriers; }
94 void requireDPAS() { needDPAS = true; }
95 void requireGlobalAtomics() { needGlobalAtomics = true; }
96 void requireGRF(int grfs) { needGRF = grfs; }
97 void requireLocalID(int dimensions) { needLocalID = dimensions; }
98 void requireLocalSize() { needLocalSize = true; }
99 void requireNonuniformWGs() { needNonuniformWGs = true; }
100 void requireNoPreemption() { needNoPreemption = true; }
101 void requireScratch(size_t bytes = 1) { scratchSize = bytes; }
102 void requireSIMD(int simd_) { simd = simd_; }
103 void requireSLM(size_t bytes) { slmSize = bytes; }
104 void requireStatelessWrites(bool req = true) { needStatelessWrites = req; }
105 inline void requireType(DataType type);
106 template <typename T> void requireType() { requireType(getDataType<T>()); }
107 void requireWalkOrder(int o1, int o2) { walkOrder[0] = o1; walkOrder[1] = o2; walkOrder[2] = -1; }
108 void requireWalkOrder(int o1, int o2, int o3) { walkOrder[0] = o1; walkOrder[1] = o2; walkOrder[2] = o3; }
109 void requireWorkgroup(size_t x, size_t y = 1,
110 size_t z = 1) { wg[0] = x; wg[1] = y; wg[2] = z; }
111
112 void setInlineGRFCount(int grfs) { inlineGRFs = grfs; }
113 void setSkipPerThreadOffset(int32_t offset) { offsetSkipPerThread = offset; }
114 void setSkipCrossThreadOffset(int32_t offset) { offsetSkipCrossThread = offset; }
115
116 inline GRF getCrossthreadBase(bool effective = true) const;
117 inline GRF getArgLoadBase() const;
118
119 inline void finalize();
120
121 template <typename CodeGenerator>
122 inline void generatePrologue(CodeGenerator &generator, const GRF &temp = GRF(127)) const;
123
124 inline void generateDummyCL(std::ostream &stream) const;
125 inline std::string generateZeInfo() const;
126
127#ifdef NGEN_ASM
128 inline void dumpAssignments(std::ostream &stream) const;
129#endif
130
131 static constexpr int noSurface = 0x80; // Returned by getArgumentSurfaceIfExists in case of no surface assignment
132
133protected:
134 struct Assignment {
135 std::string name;
136 DataType type;
137 ExternalArgumentType exttype;
138 GlobalAccessType access;
139 Subregister reg;
140 int surface;
141 int index;
142
143 bool globalSurfaceAccess() const { return (static_cast<int>(access) & static_cast<int>(GlobalAccessType::Surface)); }
144 bool globalStatelessAccess() const { return (static_cast<int>(access) & static_cast<int>(GlobalAccessType::Stateless)); }
145 };
146
147 HW hw;
148
149 std::vector<Assignment> assignments;
150 std::string kernelName = "default_kernel";
151
152 int nextArgIndex = 0;
153 bool finalized = false;
154
155 bool allow64BitBuffers = 0;
156 int barrierCount = 0;
157 bool needDPAS = false;
158 bool needGlobalAtomics = false;
159 int32_t needGRF = 128;
160 int needLocalID = 0;
161 bool needLocalSize = false;
162 bool needNonuniformWGs = false;
163 bool needNoPreemption = false;
164 bool needHalf = false;
165 bool needDouble = false;
166 bool needStatelessWrites = true;
167 int32_t offsetSkipPerThread = 0;
168 int32_t offsetSkipCrossThread = 0;
169 size_t scratchSize = 0;
170 int simd = 8;
171 size_t slmSize = 0;
172 int walkOrder[3] = {-1, -1, -1};
173 size_t wg[3] = {0, 0, 0};
174
175 int crossthreadBytes = 0;
176 int crossthreadGRFs = 0;
177 int inlineGRFs = 0;
178 inline int getCrossthreadGRFs() const;
179 inline int getCrossthreadBytes() const;
180 int grfsPerLID() const { return (simd > 16 && GRF::bytes(hw) < 64) ? 2 : 1; }
181
182 static inline int defaultInlineGRFs(HW hw);
183};
184
185using NEOInterfaceHandler = InterfaceHandler;
186
187void InterfaceHandler::newArgument(std::string name, DataType type, ExternalArgumentType exttype, GlobalAccessType access)
188{
189 if (exttype != ExternalArgumentType::GlobalPtr)
190 access = GlobalAccessType::None;
191 assignments.push_back({name, type, exttype, access, Subregister{}, noSurface, nextArgIndex++});
192}
193
194void InterfaceHandler::newArgument(std::string name, ExternalArgumentType exttype, GlobalAccessType access)
195{
196 DataType type = DataType::invalid;
197
198 switch (exttype) {
199 case ExternalArgumentType::GlobalPtr: type = DataType::uq; break;
200 case ExternalArgumentType::LocalPtr: type = DataType::ud; break;
201 default:
202#ifdef NGEN_SAFE
203 throw bad_argument_type_exception();
204#else
205 break;
206#endif
207 }
208
209 newArgument(name, type, exttype, access);
210}
211
212Subregister InterfaceHandler::getArgumentIfExists(const std::string &name) const
213{
214 for (auto &assignment : assignments) {
215 if (assignment.name == name)
216 return assignment.reg;
217 }
218
219 return Subregister{};
220}
221
222Subregister InterfaceHandler::getArgument(const std::string &name) const
223{
224 Subregister arg = getArgumentIfExists(name);
225
226#ifdef NGEN_SAFE
227 if (arg.isInvalid())
228 throw unknown_argument_exception();
229#endif
230
231 return arg;
232}
233
234int InterfaceHandler::getArgumentSurfaceIfExists(const std::string &name) const
235{
236 for (auto &assignment : assignments)
237 if (assignment.name == name)
238 return assignment.surface;
239 return noSurface;
240}
241
242int InterfaceHandler::getArgumentSurface(const std::string &name) const
243{
244 int surface = getArgumentSurfaceIfExists(name);
245
246#ifdef NGEN_SAFE
247 if (surface == noSurface)
248 throw unknown_argument_exception();
249#endif
250 return surface;
251}
252
253RegData InterfaceHandler::getSIMD1LocalID(int dim) const
254{
255#ifdef NGEN_SAFE
256 if (dim > needLocalID || simd != 1) throw unknown_argument_exception();
257#endif
258
259 return GRF(1).uw(dim);
260}
261
262GRF InterfaceHandler::getLocalID(int dim) const
263{
264#ifdef NGEN_SAFE
265 if (dim > needLocalID) throw unknown_argument_exception();
266 if (simd == 1) throw use_simd1_local_id_exception();
267#endif
268
269 if (simd == 1)
270 return GRF(1).uw();
271 else
272 return GRF(1 + dim * grfsPerLID()).uw();
273}
274
275void InterfaceHandler::requireType(DataType type)
276{
277 switch (type) {
278 case DataType::hf: needHalf = true; break;
279 case DataType::df: needDouble = true; break;
280 default: break;
281 }
282}
283
284static inline const char *getCLDataType(DataType type)
285{
286 static const char *names[16] = {"uint", "int", "ushort", "short", "uchar", "char", "double", "float", "ulong", "long", "half", "ushort", "INVALID", "INVALID", "INVALID", "INVALID"};
287 return names[static_cast<uint8_t>(type) & 0xF];
288}
289
290void InterfaceHandler::generateDummyCL(std::ostream &stream) const
291{
292#ifdef NGEN_SAFE
293 if (!finalized) throw interface_not_finalized();
294#endif
295 const char *dpasDummy = " int __builtin_IB_sub_group_idpas_s8_s8_8_1(int, int, int8) __attribute__((const));\n"
296 " int z = __builtin_IB_sub_group_idpas_s8_s8_8_1(0, ____[0], 1);\n"
297 " for (int i = 0; i < z; i++) (void) ____[0];\n";
298
299 if (needHalf) stream << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
300 if (needDouble) stream << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
301
302 if (wg[0] > 0 && wg[1] > 0 && wg[2] > 0)
303 stream << "__attribute__((reqd_work_group_size(" << wg[0] << ',' << wg[1] << ',' << wg[2] << ")))\n";
304 if (walkOrder[0] >= 0) {
305 stream << "__attribute__((intel_reqd_workgroup_walk_order(" << walkOrder[0] << ',' << walkOrder[1];
306 if (walkOrder[2] >= 0)
307 stream << ',' << walkOrder[2];
308 stream << ")))\n";
309 }
310 stream << "__attribute__((intel_reqd_sub_group_size(" << simd << ")))\n";
311 stream << "kernel void " << kernelName << '(';
312
313 bool firstArg = true;
314 for (const auto &assignment : assignments) {
315 if (assignment.exttype == ExternalArgumentType::Hidden) continue;
316
317 if (!firstArg) stream << ", ";
318
319 switch (assignment.exttype) {
320 case ExternalArgumentType::GlobalPtr: stream << "global void *"; break;
321 case ExternalArgumentType::LocalPtr: stream << "local void *"; break;
322 case ExternalArgumentType::Scalar: stream << getCLDataType(assignment.type) << ' '; break;
323 default: break;
324 }
325
326 stream << assignment.name;
327 firstArg = false;
328 }
329 stream << ") {\n";
330 stream << " global volatile int *____;\n";
331
332 if (needLocalID) stream << " (void) ____[get_local_id(0)];\n";
333 if (needLocalSize) stream << " (void) ____[get_enqueued_local_size(0)];\n";
334 if (barrierCount > 0) stream << " __asm__ volatile(\"barrier\");\n";
335 for (int i = 1; i < barrierCount; i++) {
336 stream << " local NamedBarrier_t *bar" << i << ";\n"
337 " bar" << i << " = named_barrier_init(1);\n"
338 " work_group_named_barrier(bar" << i << ", 0);\n";
339 }
340 if (needDPAS) stream << dpasDummy;
341 if (needGlobalAtomics) stream << " atomic_inc(____);\n";
342 if (scratchSize > 0) stream << " volatile char scratch[" << scratchSize << "] = {0};\n";
343 if (slmSize > 0) stream << " volatile local char slm[" << slmSize << "]; slm[0]++;\n";
344 if (needNoPreemption) {
345 if (hw == HW::Gen9)
346 stream << " volatile double *__df; *__df = 1.1 / *__df;\n"; // IEEE macro causes IGC to disable MTP.
347 /* To do: Gen11 */
348 }
349
350 if (hw >= HW::XeHP) for (const auto &assignment : assignments) {
351 // Force IGC to assume stateless accesses could occur if necessary.
352 if (assignment.exttype == ExternalArgumentType::GlobalPtr && assignment.globalStatelessAccess())
353 stream << " __asm__ volatile(\"\" :: \"rw.u\"(" << assignment.name << "));\n";
354 }
355
356 for (const auto &assignment : assignments) {
357 // Force IGC to assume surface accesses could occur if necessary.
358 if (assignment.exttype == ExternalArgumentType::GlobalPtr && assignment.globalSurfaceAccess())
359 stream << " { volatile uchar __load = ((global uchar *) " << assignment.name << ")[get_local_id(0)];}\n";
360 }
361
362 stream << "}\n";
363}
364
365inline Subregister InterfaceHandler::getLocalSize(int dim) const
366{
367 static const std::string localSizeArgs[3] = {"__local_size0", "__local_size1", "__local_size2"};
368 return getArgument(localSizeArgs[dim]);
369}
370
371void InterfaceHandler::finalize()
372{
373 // Make assignments, following NEO rules:
374 // - all inputs are naturally aligned
375 // - all sub-DWord inputs are DWord-aligned
376 // - first register is
377 // r3 (no local IDs)
378 // r5 (SIMD8/16, local IDs)
379 // r8 (SIMD32, local IDs)
380 // [- assign local ptr arguments left-to-right? not checked]
381 // - assign global pointer arguments left-to-right
382 // - assign scalar arguments left-to-right
383 // - assign surface indices left-to-right for global pointers
384 // - no arguments can cross a GRF boundary. Arrays like work size count
385 // as 1 argument for this rule.
386
387 static const std::string localSizeArgs[3] = {"__local_size0", "__local_size1", "__local_size2"};
388 static const std::string scratchSizeArg = "__scratch_size";
389
390 GRF base = getCrossthreadBase();
391 int offset = 32;
392 int nextSurface = 0;
393 const int grfSize = GRF::bytes(hw);
394
395 auto assignArgsOfType = [&](ExternalArgumentType exttype) {
396 for (auto &assignment : assignments) {
397 if (assignment.exttype != exttype) continue;
398
399 auto bytes = getBytes(assignment.type);
400 auto size = getDwords(assignment.type) << 2;
401
402 if (assignment.name == localSizeArgs[0]) {
403 // Move to next GRF if local size arguments won't fit in this one.
404 if (offset > grfSize - (3 * 4)) {
405 offset = 0;
406 base++;
407 }
408 }
409
410 offset = (offset + size - 1) & -size;
411 if (offset >= grfSize) {
412 offset = 0;
413 base++;
414 }
415
416 assignment.reg = base.sub(offset / bytes, assignment.type);
417
418 if (assignment.exttype == ExternalArgumentType::GlobalPtr) {
419 if (!assignment.globalStatelessAccess())
420 assignment.reg = Subregister{};
421 if (assignment.globalSurfaceAccess())
422 assignment.surface = nextSurface;
423 nextSurface++;
424 }
425 else if (assignment.exttype == ExternalArgumentType::Scalar)
426 requireType(assignment.type);
427
428 offset += size;
429 }
430 };
431
432 assignArgsOfType(ExternalArgumentType::LocalPtr);
433 assignArgsOfType(ExternalArgumentType::GlobalPtr);
434 assignArgsOfType(ExternalArgumentType::Scalar);
435
436 // Add private memory size arguments.
437 if (scratchSize > 0)
438 newArgument(scratchSizeArg, DataType::uq, ExternalArgumentType::Hidden);
439
440 // Add enqueued local size arguments.
441 if (needLocalSize && needNonuniformWGs)
442 for (int dim = 0; dim < 3; dim++)
443 newArgument(localSizeArgs[dim], DataType::ud, ExternalArgumentType::Hidden);
444
445 assignArgsOfType(ExternalArgumentType::Hidden);
446
447 crossthreadBytes = (base.getBase() - getCrossthreadBase().getBase()) * GRF::bytes(hw)
448 + ((offset + 31) & -32);
449 crossthreadGRFs = GRF::bytesToGRFs(hw, crossthreadBytes);
450
451 // Manually add regular local size arguments.
452 if (needLocalSize && !needNonuniformWGs)
453 for (int dim = 0; dim < 3; dim++)
454 assignments.push_back({localSizeArgs[dim], DataType::ud, ExternalArgumentType::Hidden,
455 GlobalAccessType::None, GRF(getCrossthreadBase()).ud(dim + 3), noSurface, -1});
456
457 finalized = true;
458}
459
460GRF InterfaceHandler::getCrossthreadBase(bool effective) const
461{
462 if (!needLocalID)
463 return GRF((!effective || (hw >= HW::XeHP)) ? 1 : 2);
464 else if (simd == 1)
465 return GRF(2);
466 else
467 return GRF(1 + 3 * grfsPerLID());
468}
469
470GRF InterfaceHandler::getArgLoadBase() const
471{
472 return getCrossthreadBase().advance(inlineGRFs);
473}
474
475int InterfaceHandler::getCrossthreadBytes() const
476{
477#ifdef NGEN_SAFE
478 if (!finalized) throw interface_not_finalized();
479#endif
480 return crossthreadBytes;
481}
482
483int InterfaceHandler::getCrossthreadGRFs() const
484{
485#ifdef NGEN_SAFE
486 if (!finalized) throw interface_not_finalized();
487#endif
488 return crossthreadGRFs;
489}
490
491int InterfaceHandler::defaultInlineGRFs(HW hw)
492{
493 if (hw == HW::XeHP) return 1;
494 if (hw == HW::XeHPG) return 1;
495 return 0;
496}
497
498template <typename CodeGenerator>
499void InterfaceHandler::generatePrologue(CodeGenerator &generator, const GRF &temp) const
500{
501#ifdef NGEN_INTERFACE_OLD_PROLOGUE
502 if (needLocalID)
503 generator.loadlid(getCrossthreadBytes(), needLocalID, simd, temp, 8*16);
504 if (getCrossthreadGRFs() > 1)
505 generator.loadargs(getCrossthreadBase(), getCrossthreadGRFs(), temp);
506#else
507 if (needLocalID)
508 generator.loadlid(getCrossthreadBytes(), needLocalID, simd, temp, 12*16);
509 if (getCrossthreadGRFs() > inlineGRFs)
510 generator.loadargs(getArgLoadBase(), getCrossthreadGRFs() - inlineGRFs, temp);
511#endif
512}
513
514std::string InterfaceHandler::generateZeInfo() const
515{
516#ifdef NGEN_SAFE
517 if (!finalized) throw interface_not_finalized();
518#endif
519
520 std::stringstream md;
521
522 md << "version: 1.8\n"
523 "kernels: \n"
524 " - name: \"" << kernelName << "\"\n"
525 " execution_env: \n"
526 " grf_count: " << needGRF << "\n"
527 " simd_size: " << simd << "\n";
528 if (simd > 1)
529 md << " required_sub_group_size: " << simd << "\n";
530 if (wg[0] > 0 && wg[1] > 0 && wg[2] > 0) {
531 md << " required_work_group_size:\n"
532 << " - " << wg[0] << "\n"
533 << " - " << wg[1] << "\n"
534 << " - " << wg[2] << "\n";
535 }
536 if (walkOrder[0] >= 0) {
537 md << " work_group_walk_order_dimensions:\n"
538 << " - " << walkOrder[0] << "\n"
539 << " - " << walkOrder[1] << "\n"
540 << " - " << std::max(walkOrder[2], 0) << "\n";
541 }
542 if (offsetSkipPerThread > 0)
543 md << " offset_to_skip_per_thread_data_load: " << offsetSkipPerThread << '\n';
544 if (barrierCount > 0)
545 md << " barrier_count: " << barrierCount << '\n';
546 if (allow64BitBuffers)
547 md << " has_4gb_buffers: true\n";
548 if (needDPAS)
549 md << " has_dpas: true\n";
550 if (needGlobalAtomics)
551 md << " has_global_atomics: true\n";
552 if (slmSize > 0)
553 md << " slm_size: " << slmSize << '\n';
554 if (!needStatelessWrites)
555 md << " has_no_stateless_write: true\n";
556 if (needNoPreemption)
557 md << " disable_mid_thread_preemption: true\n";
558 if (inlineGRFs > 0)
559 md << " inline_data_payload_size: " << inlineGRFs * GRF::bytes(hw) << "\n";
560 if (!assignments.empty()) {
561 md << "\n"
562 " payload_arguments: \n";
563 }
564 for (auto &assignment : assignments) {
565 uint32_t size = 0;
566 bool skipArg = false;
567 bool explicitArg = true;
568
569 if (assignment.globalSurfaceAccess()) {
570 md << " - arg_type: arg_bypointer\n"
571 " arg_index: " << assignment.index << "\n"
572 " offset: 0\n"
573 " size: 0\n"
574 " addrmode: stateful\n"
575 " addrspace: global\n"
576 " access_type: readwrite\n"
577 "\n";
578 }
579
580 switch (assignment.exttype) {
581 case ExternalArgumentType::Scalar:
582 md << " - arg_type: arg_byvalue\n";
583 size = (assignment.reg.getDwords() << 2);
584 break;
585 case ExternalArgumentType::GlobalPtr:
586 if (!assignment.globalStatelessAccess()) {
587 skipArg = true;
588 break;
589 }
590 // fall through
591 case ExternalArgumentType::LocalPtr:
592 md << " - arg_type: arg_bypointer\n";
593 size = (assignment.reg.getDwords() << 2);
594 break;
595 case ExternalArgumentType::Hidden: {
596 explicitArg = false;
597 if (assignment.name == "__local_size0") {
598 // from Zebin spec : local_size Argument size : int32x3
599 // may need refining to allow
600 // either int32x1, int32x2, int32x3 (x, xy, xyz)
601 // or fine grain : local_size_x, local_size_y, local_size_z
602 md << " - arg_type: "
603 << (needNonuniformWGs ? "enqueued_local_size\n" : "local_size\n");
604 size = (assignment.reg.getDwords() << 2) * 3;
605 } else
606 skipArg = true;
607 break;
608 }
609 }
610 if (skipArg)
611 continue;
612
613 auto offset = (assignment.reg.getBase() - getCrossthreadBase().getBase()) * GRF::bytes(hw) + assignment.reg.getByteOffset();
614 if (explicitArg)
615 md << " arg_index: " << assignment.index << "\n";
616 md << " offset: " << offset << "\n"
617 " size: " << size << '\n';
618
619 if (assignment.globalStatelessAccess()) {
620 md << " addrmode: stateless\n"
621 " addrspace: global\n"
622 " access_type: readwrite\n";
623 } else if (assignment.exttype == ExternalArgumentType::LocalPtr) {
624 md << " addrmode: slm\n"
625 " addrspace: local\n"
626 " access_type: readwrite\n";
627 }
628 md << "\n";
629 }
630
631 bool firstSurface = true;
632 for (auto &assignment : assignments) {
633 if (assignment.globalSurfaceAccess()) {
634 if (firstSurface) {
635 md << "\n"
636 " binding_table_indices: \n";
637 firstSurface = false;
638 }
639 md << " - bti_value: " << assignment.surface << "\n"
640 " arg_index: " << assignment.index << "\n"
641 " \n";
642 }
643 }
644
645 if (needLocalID) {
646 md << "\n"
647 " per_thread_payload_arguments: \n";
648
649 if (simd == 1) {
650 md << " - arg_type: packed_local_ids\n"
651 " offset: 0\n"
652 " size: 6\n"
653 " \n";
654 } else {
655 auto localIDBytes = grfsPerLID() * GRF::bytes(hw);
656 localIDBytes *= 3; // runtime currently supports 0 or 3 localId channels in per thread data
657 md << " - arg_type: local_id\n"
658 " offset: 0\n"
659 " size: " << localIDBytes << "\n"
660 " \n";
661 }
662 }
663
664 md << "\n"; // ensure file ends with newline
665
666#ifdef NGEN_DUMP_ZE_INFO
667 std::cerr << md.str();
668#endif
669
670 return md.str();
671}
672
673#ifdef NGEN_ASM
674void InterfaceHandler::dumpAssignments(std::ostream &stream) const
675{
676 LabelManager manager;
677
678 for (auto &assignment : assignments) {
679 stream << "// ";
680 if (assignment.reg.isValid())
681 assignment.reg.outputText(stream, PrintDetail::sub, manager);
682 else
683 stream << "(none)";
684 stream << '\t' << assignment.name;
685 if (assignment.surface != noSurface)
686 stream << "\t(BTI " << assignment.surface << ')';
687 stream << std::endl;
688 }
689}
690#endif
691
692} /* namespace ngen */
693
694#endif /* header guard */
695