1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef NGEN_ELF_HPP
18#define NGEN_ELF_HPP
19
20#include "ngen.hpp"
21#include "ngen_interface.hpp"
22
23#include "npack/neo_packager.hpp"
24
25namespace ngen {
26
27// ELF binary format generator class.
28template <HW hw>
29class ELFCodeGenerator : public BinaryCodeGenerator<hw>
30{
31public:
32 inline std::vector<uint8_t> getBinary();
33 static inline HW getBinaryArch(const std::vector<uint8_t> &binary);
34 static inline void getBinaryHWInfo(const std::vector<uint8_t> &binary, HW &outHW, int &outStepping);
35
36 explicit ELFCodeGenerator(int stepping_ = 0) : BinaryCodeGenerator<hw>(stepping_) {}
37
38protected:
39 NEOInterfaceHandler interface_{hw};
40
41 void externalName(const std::string &name) { interface_.externalName(name); }
42
43 const std::string &getExternalName() const { return interface_.getExternalName(); }
44 int getSIMD() const { return interface_.getSIMD(); }
45 int getGRFCount() const { return interface_.getGRFCount(); }
46 size_t getSLMSize() const { return interface_.getSLMSize(); }
47
48 void require32BitBuffers() { interface_.require32BitBuffers(); }
49 void requireBarrier() { interface_.requireBarrier(); }
50 void requireBarriers(int nbarriers) { interface_.requireBarriers(nbarriers); }
51 void requireDPAS() { interface_.requireDPAS(); }
52 void requireGlobalAtomics() { interface_.requireGlobalAtomics(); }
53 void requireGRF(int grfs) { interface_.requireGRF(grfs); }
54 void requireLocalID(int dimensions) { interface_.requireLocalID(dimensions); }
55 void requireLocalSize() { interface_.requireLocalSize(); }
56 void requireNonuniformWGs() { interface_.requireNonuniformWGs(); }
57 void requireNoPreemption() { interface_.requireNoPreemption(); }
58 void requireScratch(size_t bytes = 1) { interface_.requireScratch(bytes); }
59 void requireSIMD(int simd_) { interface_.requireSIMD(simd_); }
60 void requireSLM(size_t bytes) { interface_.requireSLM(bytes); }
61 void requireStatelessWrites(bool req = true) { interface_.requireStatelessWrites(req); }
62 inline void requireType(DataType type) { interface_.requireType(type); }
63 template <typename T> void requireType() { interface_.requireType<T>(); }
64 void requireWalkOrder(int o1, int o2) { interface_.requireWalkOrder(o1, o2); }
65 void requireWalkOrder(int o1, int o2, int o3) { interface_.requireWalkOrder(o1, o2, o3); }
66 void requireWorkgroup(size_t x, size_t y = 1, size_t z = 1) { interface_.requireWorkgroup(x, y, z); }
67
68 void finalizeInterface() { interface_.finalize(); }
69
70 template <typename DT>
71 void newArgument(std::string name) { interface_.newArgument<DT>(name); }
72 void newArgument(std::string name, DataType type,
73 ExternalArgumentType exttype = ExternalArgumentType::Scalar,
74 GlobalAccessType access = GlobalAccessType::All)
75 {
76 interface_.newArgument(name, type, exttype, access);
77 }
78 void newArgument(std::string name, ExternalArgumentType exttype,
79 GlobalAccessType access = GlobalAccessType::All)
80 {
81 interface_.newArgument(name, exttype, access);
82 }
83
84 Subregister getArgument(const std::string &name) const { return interface_.getArgument(name); }
85 Subregister getArgumentIfExists(const std::string &name) const { return interface_.getArgumentIfExists(name); }
86 int getArgumentSurface(const std::string &name) const { return interface_.getArgumentSurface(name); }
87 int getArgumentSurfaceIfExists(const std::string &name) const { return interface_.getArgumentSurfaceIfExists(name); }
88 GRF getLocalID(int dim) const { return interface_.getLocalID(dim); }
89 RegData getSIMD1LocalID(int dim) const { return interface_.getSIMD1LocalID(dim); }
90 Subregister getLocalSize(int dim) const { return interface_.getLocalSize(dim); }
91
92 void prologue() { interface_.generatePrologue(*this); }
93 void epilogue(RegData r0_info = RegData())
94 {
95 if (r0_info.isInvalid()) r0_info = this->r0;
96 int GRFCount = interface_.getGRFCount();
97 bool hasSLM = (interface_.getSLMSize() > 0);
98 BinaryCodeGenerator<hw>::epilogue(GRFCount, hasSLM, r0_info);
99 }
100
101 inline std::vector<uint8_t> getBinary(const std::vector<uint8_t> &code);
102
103private:
104 using BinaryCodeGenerator<hw>::labelManager;
105 using BinaryCodeGenerator<hw>::rootStream;
106
107 struct ZebinELF {
108 enum {
109 ELFMagic = 0x464C457F, // '\x7FELF'
110 ELFClass64 = 2,
111 ELFLittleEndian = 1,
112 ELFVersion1 = 1,
113 ELFRelocatable = 1,
114 };
115 enum {
116 MachineIntelGT = 205,
117 ZebinExec = 0xFF12
118 };
119 union TargetMetadata {
120 uint32_t all;
121 struct {
122 unsigned genFlags : 8;
123 unsigned minHWRevision : 5;
124 unsigned validateRevision : 1;
125 unsigned disableExtValidation : 1;
126 unsigned useGfxCoreFamily : 1;
127 unsigned maxHWRevision : 5;
128 unsigned generator : 3;
129 unsigned reserved : 8;
130 } parts;
131 };
132 struct FileHeader {
133 uint32_t magic = ELFMagic;
134 uint8_t elfClass = ELFClass64;
135 uint8_t endian = ELFLittleEndian;
136 uint8_t version = ELFVersion1;
137 uint8_t osABI = 0;
138 uint64_t pad = 0;
139 uint16_t type = ELFRelocatable;
140 uint16_t machine = MachineIntelGT;
141 uint32_t version2 = 1;
142 uint64_t entrypoint = 0;
143 uint64_t programHeaderOff = 0;
144 uint64_t sectionTableOff;
145 TargetMetadata flags;
146 uint16_t size;
147 uint16_t programHeaderSize = 0;
148 uint16_t programTableEntries = 0;
149 uint16_t sectionHeaderSize;
150 uint16_t sectionCount;
151 uint16_t strTableIndex = 1;
152 } fileHeader;
153 struct SectionHeader {
154 uint32_t name;
155 enum Type : uint32_t {
156 Null = 0, Program = 1, SymbolTable = 2, StringTable = 3, Note = 7, ZeInfo = 0xFF000011
157 } type;
158 uint64_t flags = 0;
159 uint64_t addr = 0;
160 uint64_t offset;
161 uint64_t size;
162 uint32_t link = 0;
163 uint32_t info = 0;
164 uint64_t align = 0x10;
165 uint64_t entrySize = 0;
166 } sectionHeaders[5];
167 struct Note {
168 uint32_t nameSize = 8;
169 uint32_t descSize = 4;
170 enum Type : uint32_t {
171 ProductFamily = 1, GfxCoreFamily = 2, TargetMetadata = 3
172 } type = Type::GfxCoreFamily;
173 const char name[8] = "IntelGT";
174 uint32_t payload;
175 } noteGfxCore;
176 struct StringTable {
177 const char zero = '\0';
178 const char snStrTable[10] = ".shstrtab";
179 const char snMetadata[9] = ".ze_info";
180 const char snNote[21] = ".note.intelgt.compat";
181 const char snText[6] = {'.', 't', 'e', 'x', 't', '.'};
182 } stringTable;
183
184 static size_t align(size_t sz) {
185 return (sz + 0xF) & ~0xF;
186 }
187
188 ZebinELF(size_t szKernelName, size_t szMetadata, size_t szKernel) {
189 fileHeader.size = sizeof(fileHeader);
190 fileHeader.sectionHeaderSize = sizeof(SectionHeader);
191 fileHeader.sectionTableOff = offsetof(ZebinELF, sectionHeaders);
192 fileHeader.sectionCount = sizeof(sectionHeaders) / sizeof(SectionHeader);
193
194 fileHeader.flags.all = 0;
195
196 sectionHeaders[0].name = 0;
197 sectionHeaders[0].type = SectionHeader::Type::Null;
198 sectionHeaders[0].offset = 0;
199 sectionHeaders[0].size = 0;
200
201 sectionHeaders[1].name = offsetof(StringTable, snStrTable);
202 sectionHeaders[1].type = SectionHeader::Type::StringTable;
203 sectionHeaders[1].offset = offsetof(ZebinELF, stringTable);
204 sectionHeaders[1].size = sizeof(stringTable);
205
206 sectionHeaders[2].name = offsetof(StringTable, snMetadata);
207 sectionHeaders[2].type = SectionHeader::Type::ZeInfo;
208 sectionHeaders[2].offset = align(sizeof(ZebinELF) + szKernelName);
209 sectionHeaders[2].size = szMetadata;
210
211 sectionHeaders[3].name = offsetof(StringTable, snText);
212 sectionHeaders[3].type = SectionHeader::Type::Program;
213 sectionHeaders[3].offset = sectionHeaders[2].offset + align(szMetadata);
214 sectionHeaders[3].size = szKernel;
215
216 sectionHeaders[4].name = offsetof(StringTable, snNote);
217 sectionHeaders[4].type = SectionHeader::Type::Note;
218 sectionHeaders[4].offset = offsetof(ZebinELF, noteGfxCore);
219 sectionHeaders[4].size = sizeof(noteGfxCore);
220
221 noteGfxCore.payload = static_cast<uint32_t>(npack::encodeGfxCoreFamily(hw));
222 }
223
224 static size_t kernelNameOffset() {
225 return offsetof(ZebinELF, stringTable.snText) + sizeof(stringTable.snText);
226 }
227
228 bool valid() const {
229 if (fileHeader.magic != ELFMagic || fileHeader.elfClass != ELFClass64
230 || fileHeader.endian != ELFLittleEndian || fileHeader.sectionHeaderSize != sizeof(SectionHeader)
231 || (fileHeader.version != 0 && fileHeader.version != ELFVersion1)
232 || (fileHeader.type != ZebinExec && fileHeader.type != ELFRelocatable))
233 return false;
234 auto *base = reinterpret_cast<const uint8_t *>(&fileHeader);
235 auto *sheader = reinterpret_cast<const SectionHeader *>(base + fileHeader.sectionTableOff);
236 for (int s = 0; s < fileHeader.sectionCount; s++, sheader++)
237 if (sheader->type == SectionHeader::Type::ZeInfo)
238 return true;
239 return false;
240 }
241
242 void findNotes(const Note *&start, const Note *&end) const {
243 auto *base = reinterpret_cast<const uint8_t *>(&fileHeader);
244 auto *sheader0 = reinterpret_cast<const SectionHeader *>(base + fileHeader.sectionTableOff);
245 const char *strtab = nullptr;
246 uint64_t strtabSize = 0;
247
248 auto sheader = sheader0;
249 for (int s = 0; s < fileHeader.sectionCount; s++, sheader++) {
250 if (sheader->type == SectionHeader::Type::StringTable) {
251 strtab = reinterpret_cast<const char *>(base + sheader->offset);
252 strtabSize = sheader->size;
253 }
254 }
255
256 bool found = false;
257 sheader = sheader0;
258 for (int s = 0; s < fileHeader.sectionCount; s++, sheader++)
259 if (sheader->type == SectionHeader::Type::Note)
260 if (sheader->name < strtabSize)
261 if (!strcmp(strtab + sheader->name, ".note.intelgt.compat"))
262 { found = true; break; }
263
264 if (found) {
265 start = reinterpret_cast<const Note *>(base + sheader->offset);
266 end = reinterpret_cast<const Note *>(base + sheader->offset + sheader->size);
267 } else
268 start = end = nullptr;
269 }
270 };
271};
272
273#define NGEN_FORWARD_ELF(hw) NGEN_FORWARD(hw) \
274template <typename... Targs> void externalName(Targs&&... args) { ngen::ELFCodeGenerator<hw>::externalName(std::forward<Targs>(args)...); } \
275const std::string &getExternalName() const { return ngen::ELFCodeGenerator<hw>::getExternalName(); } \
276int getSIMD() const { return ngen::ELFCodeGenerator<hw>::getSIMD(); } \
277int getGRFCount() const { return ngen::ELFCodeGenerator<hw>::getGRFCount(); } \
278size_t getSLMSize() const { return ngen::ELFCodeGenerator<hw>::getSLMSize(); } \
279template <typename... Targs> void require32BitBuffers(Targs&&... args) { ngen::ELFCodeGenerator<hw>::require32BitBuffers(std::forward<Targs>(args)...); } \
280template <typename... Targs> void requireBarrier(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireBarrier(std::forward<Targs>(args)...); } \
281template <typename... Targs> void requireGlobalAtomics(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireGlobalAtomics(std::forward<Targs>(args)...); } \
282template <typename... Targs> void requireGRF(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireGRF(std::forward<Targs>(args)...); } \
283template <typename... Targs> void requireLocalID(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireLocalID(std::forward<Targs>(args)...); } \
284template <typename... Targs> void requireLocalSize(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireLocalSize(std::forward<Targs>(args)...); } \
285template <typename... Targs> void requireNonuniformWGs(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireNonuniformWGs(std::forward<Targs>(args)...); } \
286template <typename... Targs> void requireNoPreemption(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireNoPreemption(std::forward<Targs>(args)...); } \
287template <typename... Targs> void requireScratch(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireScratch(std::forward<Targs>(args)...); } \
288template <typename... Targs> void requireSIMD(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireSIMD(std::forward<Targs>(args)...); } \
289template <typename... Targs> void requireSLM(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireSLM(std::forward<Targs>(args)...); } \
290template <typename... Targs> void requireStatelessWrites(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireStatelessWrites(std::forward<Targs>(args)...); } \
291void requireType(ngen::DataType type) { ngen::ELFCodeGenerator<hw>::requireType(type); } \
292template <typename DT = void> void requireType() { ngen::BinaryCodeGenerator<hw>::template requireType<DT>(); } \
293template <typename... Targs> void requireWalkOrder(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireWalkOrder(std::forward<Targs>(args)...); } \
294template <typename... Targs> void requireWorkgroup(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireWorkgroup(std::forward<Targs>(args)...); } \
295template <typename... Targs> void finalizeInterface(Targs&&... args) { ngen::ELFCodeGenerator<hw>::finalizeInterface(std::forward<Targs>(args)...); } \
296template <typename... Targs> void newArgument(Targs&&... args) { ngen::ELFCodeGenerator<hw>::newArgument(std::forward<Targs>(args)...); } \
297template <typename... Targs> ngen::Subregister getArgument(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getArgument(std::forward<Targs>(args)...); } \
298template <typename... Targs> ngen::Subregister getArgumentIfExists(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getArgumentIfExists(std::forward<Targs>(args)...); } \
299template <typename... Targs> int getArgumentSurface(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getArgumentSurface(std::forward<Targs>(args)...); } \
300template <typename... Targs> int getArgumentSurfaceIfExists(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getArgumentSurfaceIfExists(std::forward<Targs>(args)...); } \
301template <typename... Targs> ngen::GRF getLocalID(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getLocalID(std::forward<Targs>(args)...); } \
302template <typename... Targs> ngen::RegData getSIMD1LocalID(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getSIMD1LocalID(std::forward<Targs>(args)...); } \
303template <typename... Targs> ngen::Subregister getLocalSize(Targs&&... args) { return ngen::ELFCodeGenerator<hw>::getLocalSize(std::forward<Targs>(args)...); } \
304void epilogue(const ngen::RegData &r0_info = ngen::RegData()) { ngen::ELFCodeGenerator<hw>::epilogue(r0_info); } \
305NGEN_FORWARD_ELF_EXTRA \
306NGEN_FORWARD_ELF_EXTRA2
307
308#define NGEN_FORWARD_ELF_EXTRA \
309template <typename... Targs> void requireDPAS(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireDPAS(std::forward<Targs>(args)...); } \
310void prologue() { ngen::ELFCodeGenerator<hw>::prologue(); }
311
312#define NGEN_FORWARD_ELF_EXTRA2 \
313template <typename... Targs> void requireBarriers(Targs&&... args) { ngen::ELFCodeGenerator<hw>::requireBarriers(std::forward<Targs>(args)...); }
314
315
316template <HW hw>
317std::vector<uint8_t> ELFCodeGenerator<hw>::getBinary()
318{
319 return getBinary(this->getCode());
320}
321
322template <HW hw>
323std::vector<uint8_t> ELFCodeGenerator<hw>::getBinary(const std::vector<uint8_t> &kernel)
324{
325 using super = BinaryCodeGenerator<hw>;
326 std::vector<uint8_t> binary;
327 std::string metadata;
328
329 // Locate entrypoints for XeHP+.
330 if (hw >= HW::XeHP) {
331 auto idPerThread = super::_labelLocalIDsLoaded.getID(labelManager);
332 auto idCrossThread = super::_labelArgsLoaded.getID(labelManager);
333
334 if (labelManager.hasTarget(idPerThread))
335 interface_.setSkipPerThreadOffset(labelManager.getTarget(idPerThread));
336 if (labelManager.hasTarget(idCrossThread))
337 interface_.setSkipCrossThreadOffset(labelManager.getTarget(idCrossThread));
338 }
339
340 // Generate metadata.
341 metadata = interface_.generateZeInfo();
342
343 // Construct ELF.
344 size_t szKernelName = interface_.getExternalName().length();
345 size_t szELF = ZebinELF::align(sizeof(ZebinELF) + szKernelName);
346 size_t szMetadata = ZebinELF::align(metadata.size());
347 size_t szKernel = ZebinELF::align(kernel.size());
348
349 binary.resize(szELF + szMetadata + szKernel);
350
351 (void) new(binary.data()) ZebinELF(szKernelName, szMetadata, szKernel);
352 utils::copy_into(binary, ZebinELF::kernelNameOffset(), interface_.getExternalName());
353 utils::copy_into(binary, szELF, metadata);
354 utils::copy_into(binary, szELF + szMetadata, kernel);
355
356 return binary;
357}
358
359template <HW hw>
360inline HW ELFCodeGenerator<hw>::getBinaryArch(const std::vector<uint8_t> &binary)
361{
362 HW outHW;
363 int outStepping;
364
365 getBinaryHWInfo(binary, outHW, outStepping);
366
367 return outHW;
368}
369
370template <HW hw>
371inline void ELFCodeGenerator<hw>::getBinaryHWInfo(const std::vector<uint8_t> &binary, HW &outHW, int &outStepping)
372{
373 using Note = typename ZebinELF::Note;
374
375 outHW = HW::Unknown;
376 outStepping = 0;
377
378 auto zebinELF = reinterpret_cast<const ZebinELF *>(binary.data());
379 if (zebinELF->valid()) {
380 // Check for .note.intelgt.compat section first. If not present, fall back to flags.
381 const Note *start, *end;
382 zebinELF->findNotes(start, end);
383 if (start && end) {
384 while (start < end) {
385 auto rstart = reinterpret_cast<const uint8_t *>(start);
386 if (start->descSize == sizeof(start->payload)) {
387 auto *actualPayload = reinterpret_cast<const uint32_t *>(
388 rstart + offsetof(Note, payload) - sizeof(Note::name) + utils::alignup_pow2(start->nameSize, 4)
389 );
390 switch (start->type) {
391 case Note::Type::ProductFamily: {
392 auto decodedHW = npack::decodeProductFamily(static_cast<npack::ProductFamily>(*actualPayload));
393 if (decodedHW != HW::Unknown)
394 outHW = decodedHW;
395 break;
396 }
397 case Note::Type::GfxCoreFamily:
398 if (outHW == HW::Unknown)
399 outHW = npack::decodeGfxCoreFamily(static_cast<npack::GfxCoreFamily>(*actualPayload));
400 break;
401 case Note::Type::TargetMetadata: {
402 typename ZebinELF::TargetMetadata metadata;
403 metadata.all = *actualPayload;
404 outStepping = metadata.parts.minHWRevision;
405 }
406 default: break;
407 }
408 }
409 start = reinterpret_cast<const Note *>(
410 rstart + offsetof(Note, payload)
411 + utils::alignup_pow2(start->nameSize, 4)
412 + utils::alignup_pow2(start->descSize, 4)
413 );
414 }
415 } else {
416 if (zebinELF->fileHeader.flags.parts.useGfxCoreFamily)
417 outHW = npack::decodeGfxCoreFamily(static_cast<npack::GfxCoreFamily>(zebinELF->fileHeader.machine));
418 else
419 outHW = npack::decodeProductFamily(static_cast<npack::ProductFamily>(zebinELF->fileHeader.machine));
420 outStepping = zebinELF->fileHeader.flags.parts.minHWRevision;
421 }
422 } else
423 npack::getBinaryHWInfo(binary, outHW, outStepping);
424}
425
426} /* namespace ngen */
427
428#endif /* NGEN_ELF_HPP */
429