1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef NGEN_OPENCL_HPP |
18 | #define NGEN_OPENCL_HPP |
19 | |
20 | #include "ngen_config.hpp" |
21 | |
22 | #include <CL/cl.h> |
23 | |
24 | #include <atomic> |
25 | #include <sstream> |
26 | |
27 | #include "ngen_elf.hpp" |
28 | #include "ngen_interface.hpp" |
29 | |
30 | #include "npack/neo_packager.hpp" |
31 | |
32 | namespace ngen { |
33 | |
34 | |
35 | // Exceptions. |
36 | class unsupported_opencl_runtime : public std::runtime_error { |
37 | public: |
38 | unsupported_opencl_runtime() : std::runtime_error("Unsupported OpenCL runtime." ) {} |
39 | }; |
40 | class opencl_error : public std::runtime_error { |
41 | public: |
42 | opencl_error(cl_int status_ = 0) : std::runtime_error("An OpenCL error occurred: " + std::to_string(status_)), status(status_) {} |
43 | protected: |
44 | cl_int status; |
45 | }; |
46 | |
47 | // OpenCL program generator class. |
48 | template <HW hw> |
49 | class OpenCLCodeGenerator : public ELFCodeGenerator<hw> |
50 | { |
51 | public: |
52 | explicit OpenCLCodeGenerator(int stepping_ = 0) : ELFCodeGenerator<hw>(stepping_) {} |
53 | |
54 | inline std::vector<uint8_t> getBinary(cl_context context, cl_device_id device, const std::string &options = "-cl-std=CL2.0" ); |
55 | inline cl_kernel getKernel(cl_context context, cl_device_id device, const std::string &options = "-cl-std=CL2.0" ); |
56 | static inline HW detectHW(cl_context context, cl_device_id device); |
57 | static inline void detectHWInfo(cl_context context, cl_device_id device, HW &outHW, int &outStepping); |
58 | |
59 | private: |
60 | inline std::vector<uint8_t> getPatchTokenBinary(cl_context context, cl_device_id device, const std::vector<uint8_t> *code = nullptr, const std::string &options = "-cl-std=CL2.0" ); |
61 | }; |
62 | |
63 | #define NGEN_FORWARD_OPENCL(hw) NGEN_FORWARD_ELF(hw) |
64 | |
65 | namespace detail { |
66 | |
67 | static inline void handleCL(cl_int result) |
68 | { |
69 | if (result != CL_SUCCESS) |
70 | throw opencl_error{result}; |
71 | } |
72 | |
73 | static inline std::vector<uint8_t> getOpenCLCProgramBinary(cl_context context, cl_device_id device, const char *src, const char *options) |
74 | { |
75 | cl_int status; |
76 | |
77 | auto program = clCreateProgramWithSource(context, 1, &src, nullptr, &status); |
78 | |
79 | detail::handleCL(status); |
80 | if (program == nullptr) |
81 | throw opencl_error(); |
82 | |
83 | detail::handleCL(clBuildProgram(program, 1, &device, options, nullptr, nullptr)); |
84 | size_t nDevices = 0; |
85 | detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(size_t), &nDevices, nullptr)); |
86 | std::vector<cl_device_id> devices(nDevices); |
87 | detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * nDevices, devices.data(), nullptr)); |
88 | size_t deviceIdx = std::distance(devices.begin(), std::find(devices.begin(), devices.end(), device)); |
89 | |
90 | if (deviceIdx >= nDevices) |
91 | throw opencl_error(); |
92 | |
93 | std::vector<size_t> binarySize(nDevices); |
94 | std::vector<uint8_t *> binaryPointers(nDevices); |
95 | std::vector<std::vector<uint8_t>> binaries(nDevices); |
96 | |
97 | detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * nDevices, binarySize.data(), nullptr)); |
98 | for (size_t i = 0; i < nDevices; i++) { |
99 | binaries[i].resize(binarySize[i]); |
100 | binaryPointers[i] = binaries[i].data(); |
101 | } |
102 | |
103 | detail::handleCL(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(uint8_t *) * nDevices, binaryPointers.data(), nullptr)); |
104 | detail::handleCL(clReleaseProgram(program)); |
105 | |
106 | return binaries[deviceIdx]; |
107 | } |
108 | |
109 | inline bool tryZebinFirst(cl_device_id device, bool setDefault = false, bool newDefault = false) |
110 | { |
111 | static std::atomic<bool> hint(false); |
112 | if (setDefault) hint = newDefault; |
113 | |
114 | return hint; |
115 | } |
116 | |
117 | }; /* namespace detail */ |
118 | |
119 | template <HW hw> |
120 | std::vector<uint8_t> OpenCLCodeGenerator<hw>::getPatchTokenBinary(cl_context context, cl_device_id device, const std::vector<uint8_t> *code, const std::string &options) |
121 | { |
122 | using super = ELFCodeGenerator<hw>; |
123 | std::ostringstream dummyCL; |
124 | auto modOptions = options; |
125 | |
126 | if ((hw >= HW::XeHP) && (super::interface_.needGRF > 128)) |
127 | modOptions.append(" -cl-intel-256-GRF-per-thread" ); |
128 | |
129 | super::interface_.generateDummyCL(dummyCL); |
130 | auto dummyCLString = dummyCL.str(); |
131 | |
132 | auto binary = detail::getOpenCLCProgramBinary(context, device, dummyCLString.c_str(), modOptions.c_str()); |
133 | |
134 | npack::replaceKernel(binary, code ? *code : this->getCode()); |
135 | |
136 | return binary; |
137 | } |
138 | |
139 | template <HW hw> |
140 | std::vector<uint8_t> OpenCLCodeGenerator<hw>::getBinary(cl_context context, cl_device_id device, const std::string &options) |
141 | { |
142 | using super = ELFCodeGenerator<hw>; |
143 | bool zebinFirst = detail::tryZebinFirst(device); |
144 | |
145 | auto code = this->getCode(); |
146 | |
147 | for (bool defaultFormat : {true, false}) { |
148 | bool legacy = defaultFormat ^ zebinFirst; |
149 | |
150 | if (legacy) { |
151 | try { |
152 | return getPatchTokenBinary(context, device, &code, options); |
153 | } catch (...) { |
154 | (void) detail::tryZebinFirst(device, true, true); |
155 | continue; |
156 | } |
157 | } else |
158 | return super::getBinary(code); |
159 | } |
160 | |
161 | return std::vector<uint8_t>(); // Unreachable. |
162 | } |
163 | |
164 | template <HW hw> |
165 | cl_kernel OpenCLCodeGenerator<hw>::getKernel(cl_context context, cl_device_id device, const std::string &options) |
166 | { |
167 | using super = ELFCodeGenerator<hw>; |
168 | cl_int status = CL_SUCCESS; |
169 | cl_program program = nullptr; |
170 | bool good = false; |
171 | bool zebinFirst = detail::tryZebinFirst(device); |
172 | std::vector<uint8_t> binary; |
173 | |
174 | auto code = this->getCode(); |
175 | |
176 | for (bool defaultFormat : {true, false}) { |
177 | bool legacy = defaultFormat ^ zebinFirst; |
178 | |
179 | if (legacy) { |
180 | try { |
181 | binary = getPatchTokenBinary(context, device, &code); |
182 | } catch (...) { |
183 | continue; |
184 | } |
185 | } else |
186 | binary = super::getBinary(code); |
187 | |
188 | const auto *binaryPtr = binary.data(); |
189 | size_t binarySize = binary.size(); |
190 | status = CL_SUCCESS; |
191 | program = clCreateProgramWithBinary(context, 1, &device, &binarySize, &binaryPtr, nullptr, &status); |
192 | |
193 | if ((program == nullptr) || (status != CL_SUCCESS)) |
194 | continue; |
195 | |
196 | status = clBuildProgram(program, 1, &device, options.c_str(), nullptr, nullptr); |
197 | |
198 | good = (status == CL_SUCCESS); |
199 | if (good) { |
200 | (void) detail::tryZebinFirst(device, true, !legacy); |
201 | break; |
202 | } else |
203 | detail::handleCL(clReleaseProgram(program)); |
204 | } |
205 | |
206 | if (!good) |
207 | throw opencl_error(status); |
208 | |
209 | auto kernel = clCreateKernel(program, super::interface_.getExternalName().c_str(), &status); |
210 | detail::handleCL(status); |
211 | if (kernel == nullptr) |
212 | throw opencl_error(); |
213 | |
214 | detail::handleCL(clReleaseProgram(program)); |
215 | |
216 | return kernel; |
217 | } |
218 | |
219 | template <HW hw> |
220 | HW OpenCLCodeGenerator<hw>::detectHW(cl_context context, cl_device_id device) |
221 | { |
222 | HW outHW; |
223 | int outStepping; |
224 | |
225 | detectHWInfo(context, device, outHW, outStepping); |
226 | |
227 | return outHW; |
228 | } |
229 | |
230 | template <HW hw> |
231 | void OpenCLCodeGenerator<hw>::detectHWInfo(cl_context context, cl_device_id device, HW &outHW, int &outStepping) |
232 | { |
233 | const char *dummyCL = "kernel void _(){}" ; |
234 | const char *dummyOptions = "" ; |
235 | |
236 | auto binary = detail::getOpenCLCProgramBinary(context, device, dummyCL, dummyOptions); |
237 | |
238 | ELFCodeGenerator<hw>::getBinaryHWInfo(binary, outHW, outStepping); |
239 | } |
240 | |
241 | } /* namespace ngen */ |
242 | |
243 | #endif |
244 | |