1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef NGEN_OPENCL_HPP
18#define NGEN_OPENCL_HPP
19
20#include "ngen_config.hpp"
21
22#include <CL/cl.h>
23
24#include <atomic>
25#include <sstream>
26
27#include "ngen_elf.hpp"
28#include "ngen_interface.hpp"
29
30#include "npack/neo_packager.hpp"
31
32namespace ngen {
33
34
35// Exceptions.
36class unsupported_opencl_runtime : public std::runtime_error {
37public:
38 unsupported_opencl_runtime() : std::runtime_error("Unsupported OpenCL runtime.") {}
39};
40class opencl_error : public std::runtime_error {
41public:
42 opencl_error(cl_int status_ = 0) : std::runtime_error("An OpenCL error occurred: " + std::to_string(status_)), status(status_) {}
43protected:
44 cl_int status;
45};
46
47// OpenCL program generator class.
48template <HW hw>
49class OpenCLCodeGenerator : public ELFCodeGenerator<hw>
50{
51public:
52 explicit OpenCLCodeGenerator(int stepping_ = 0) : ELFCodeGenerator<hw>(stepping_) {}
53
54 inline std::vector<uint8_t> getBinary(cl_context context, cl_device_id device, const std::string &options = "-cl-std=CL2.0");
55 inline cl_kernel getKernel(cl_context context, cl_device_id device, const std::string &options = "-cl-std=CL2.0");
56 static inline HW detectHW(cl_context context, cl_device_id device);
57 static inline void detectHWInfo(cl_context context, cl_device_id device, HW &outHW, int &outStepping);
58
59private:
60 inline std::vector<uint8_t> getPatchTokenBinary(cl_context context, cl_device_id device, const std::vector<uint8_t> *code = nullptr, const std::string &options = "-cl-std=CL2.0");
61};
62
63#define NGEN_FORWARD_OPENCL(hw) NGEN_FORWARD_ELF(hw)
64
65namespace detail {
66
67static inline void handleCL(cl_int result)
68{
69 if (result != CL_SUCCESS)
70 throw opencl_error{result};
71}
72
73static inline std::vector<uint8_t> getOpenCLCProgramBinary(cl_context context, cl_device_id device, const char *src, const char *options)
74{
75 cl_int status;
76
77 auto program = clCreateProgramWithSource(context, 1, &src, nullptr, &status);
78
79 detail::handleCL(status);
80 if (program == nullptr)
81 throw opencl_error();
82
83 detail::handleCL(clBuildProgram(program, 1, &device, options, nullptr, nullptr));
84 size_t nDevices = 0;
85 detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(size_t), &nDevices, nullptr));
86 std::vector<cl_device_id> devices(nDevices);
87 detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * nDevices, devices.data(), nullptr));
88 size_t deviceIdx = std::distance(devices.begin(), std::find(devices.begin(), devices.end(), device));
89
90 if (deviceIdx >= nDevices)
91 throw opencl_error();
92
93 std::vector<size_t> binarySize(nDevices);
94 std::vector<uint8_t *> binaryPointers(nDevices);
95 std::vector<std::vector<uint8_t>> binaries(nDevices);
96
97 detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * nDevices, binarySize.data(), nullptr));
98 for (size_t i = 0; i < nDevices; i++) {
99 binaries[i].resize(binarySize[i]);
100 binaryPointers[i] = binaries[i].data();
101 }
102
103 detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t *) * nDevices, binaryPointers.data(), nullptr));
104 detail::handleCL(clReleaseProgram(program));
105
106 return binaries[deviceIdx];
107}
108
109inline bool tryZebinFirst(cl_device_id device, bool setDefault = false, bool newDefault = false)
110{
111 static std::atomic<bool> hint(false);
112 if (setDefault) hint = newDefault;
113
114 return hint;
115}
116
117}; /* namespace detail */
118
119template <HW hw>
120std::vector<uint8_t> OpenCLCodeGenerator<hw>::getPatchTokenBinary(cl_context context, cl_device_id device, const std::vector<uint8_t> *code, const std::string &options)
121{
122 using super = ELFCodeGenerator<hw>;
123 std::ostringstream dummyCL;
124 auto modOptions = options;
125
126 if ((hw >= HW::XeHP) && (super::interface_.needGRF > 128))
127 modOptions.append(" -cl-intel-256-GRF-per-thread");
128
129 super::interface_.generateDummyCL(dummyCL);
130 auto dummyCLString = dummyCL.str();
131
132 auto binary = detail::getOpenCLCProgramBinary(context, device, dummyCLString.c_str(), modOptions.c_str());
133
134 npack::replaceKernel(binary, code ? *code : this->getCode());
135
136 return binary;
137}
138
139template <HW hw>
140std::vector<uint8_t> OpenCLCodeGenerator<hw>::getBinary(cl_context context, cl_device_id device, const std::string &options)
141{
142 using super = ELFCodeGenerator<hw>;
143 bool zebinFirst = detail::tryZebinFirst(device);
144
145 auto code = this->getCode();
146
147 for (bool defaultFormat : {true, false}) {
148 bool legacy = defaultFormat ^ zebinFirst;
149
150 if (legacy) {
151 try {
152 return getPatchTokenBinary(context, device, &code, options);
153 } catch (...) {
154 (void) detail::tryZebinFirst(device, true, true);
155 continue;
156 }
157 } else
158 return super::getBinary(code);
159 }
160
161 return std::vector<uint8_t>(); // Unreachable.
162}
163
164template <HW hw>
165cl_kernel OpenCLCodeGenerator<hw>::getKernel(cl_context context, cl_device_id device, const std::string &options)
166{
167 using super = ELFCodeGenerator<hw>;
168 cl_int status = CL_SUCCESS;
169 cl_program program = nullptr;
170 bool good = false;
171 bool zebinFirst = detail::tryZebinFirst(device);
172 std::vector<uint8_t> binary;
173
174 auto code = this->getCode();
175
176 for (bool defaultFormat : {true, false}) {
177 bool legacy = defaultFormat ^ zebinFirst;
178
179 if (legacy) {
180 try {
181 binary = getPatchTokenBinary(context, device, &code);
182 } catch (...) {
183 continue;
184 }
185 } else
186 binary = super::getBinary(code);
187
188 const auto *binaryPtr = binary.data();
189 size_t binarySize = binary.size();
190 status = CL_SUCCESS;
191 program = clCreateProgramWithBinary(context, 1, &device, &binarySize, &binaryPtr, nullptr, &status);
192
193 if ((program == nullptr) || (status != CL_SUCCESS))
194 continue;
195
196 status = clBuildProgram(program, 1, &device, options.c_str(), nullptr, nullptr);
197
198 good = (status == CL_SUCCESS);
199 if (good) {
200 (void) detail::tryZebinFirst(device, true, !legacy);
201 break;
202 } else
203 detail::handleCL(clReleaseProgram(program));
204 }
205
206 if (!good)
207 throw opencl_error(status);
208
209 auto kernel = clCreateKernel(program, super::interface_.getExternalName().c_str(), &status);
210 detail::handleCL(status);
211 if (kernel == nullptr)
212 throw opencl_error();
213
214 detail::handleCL(clReleaseProgram(program));
215
216 return kernel;
217}
218
219template <HW hw>
220HW OpenCLCodeGenerator<hw>::detectHW(cl_context context, cl_device_id device)
221{
222 HW outHW;
223 int outStepping;
224
225 detectHWInfo(context, device, outHW, outStepping);
226
227 return outHW;
228}
229
230template <HW hw>
231void OpenCLCodeGenerator<hw>::detectHWInfo(cl_context context, cl_device_id device, HW &outHW, int &outStepping)
232{
233 const char *dummyCL = "kernel void _(){}";
234 const char *dummyOptions = "";
235
236 auto binary = detail::getOpenCLCProgramBinary(context, device, dummyCL, dummyOptions);
237
238 ELFCodeGenerator<hw>::getBinaryHWInfo(binary, outHW, outStepping);
239}
240
241} /* namespace ngen */
242
243#endif
244