1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include "gpu/ocl/ocl_gpu_device_info.hpp" |
18 | #include "gpu/ocl/ocl_gpu_engine.hpp" |
19 | #include "gpu/ocl/ocl_gpu_hw_info.hpp" |
20 | |
21 | namespace dnnl { |
22 | namespace impl { |
23 | namespace gpu { |
24 | namespace ocl { |
25 | |
26 | status_t ocl_gpu_device_info_t::init_arch(engine_t *engine) { |
27 | cl_int err = CL_SUCCESS; |
28 | auto device = utils::downcast<const ocl_gpu_engine_t *>(engine)->device(); |
29 | |
30 | // skip other vendors |
31 | const cl_uint intel_vendor_id = 0x8086; |
32 | cl_uint vendor_id; |
33 | err = clGetDeviceInfo( |
34 | device, CL_DEVICE_VENDOR_ID, sizeof(cl_uint), &vendor_id, nullptr); |
35 | OCL_CHECK(err); |
36 | if (vendor_id != intel_vendor_id) return status::success; |
37 | |
38 | cl_context context |
39 | = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err); |
40 | OCL_CHECK(err); |
41 | |
42 | init_gpu_hw_info(engine, device, context, gpu_arch_, stepping_id_, |
43 | mayiuse_ngen_kernels_); |
44 | |
45 | err = clReleaseContext(context); |
46 | OCL_CHECK(err); |
47 | |
48 | // XXX: temporary WA for different Xe_HP devices |
49 | if (gpu_arch_ == compute::gpu_arch_t::xe_hp) { |
50 | // query extensions |
51 | size_t param_size = 0; |
52 | err = clGetDeviceInfo( |
53 | device, CL_DEVICE_EXTENSIONS, 0, nullptr, ¶m_size); |
54 | OCL_CHECK(err); |
55 | |
56 | std::string extension_string(param_size, '\0'); |
57 | err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, param_size, |
58 | &extension_string[0], ¶m_size); |
59 | OCL_CHECK(err); |
60 | if (extension_string.find(ext2cl_str(compute::device_ext_t::khr_fp64)) |
61 | == std::string::npos) |
62 | gpu_arch_ = compute::gpu_arch_t::xe_hpg; |
63 | } |
64 | return status::success; |
65 | } |
66 | |
67 | status_t ocl_gpu_device_info_t::init_device_name(engine_t *engine) { |
68 | cl_int err = CL_SUCCESS; |
69 | auto device = utils::downcast<const ocl_gpu_engine_t *>(engine)->device(); |
70 | |
71 | size_t param_size = 0; |
72 | err = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, ¶m_size); |
73 | OCL_CHECK(err); |
74 | |
75 | name_ = std::string(param_size, '\0'); |
76 | err = clGetDeviceInfo( |
77 | device, CL_DEVICE_NAME, param_size, &name_[0], ¶m_size); |
78 | OCL_CHECK(err); |
79 | |
80 | return status::success; |
81 | } |
82 | |
83 | status_t ocl_gpu_device_info_t::init_runtime_version(engine_t *engine) { |
84 | cl_int err = CL_SUCCESS; |
85 | auto device = utils::downcast<const ocl_gpu_engine_t *>(engine)->device(); |
86 | |
87 | size_t param_size = 0; |
88 | err = clGetDeviceInfo(device, CL_DRIVER_VERSION, 0, nullptr, ¶m_size); |
89 | OCL_CHECK(err); |
90 | |
91 | std::string driver_version(param_size, '\0'); |
92 | err = clGetDeviceInfo( |
93 | device, CL_DRIVER_VERSION, param_size, &driver_version[0], nullptr); |
94 | OCL_CHECK(err); |
95 | |
96 | if (runtime_version_.set_from_string(&driver_version[0]) |
97 | != status::success) { |
98 | runtime_version_.major = 0; |
99 | runtime_version_.minor = 0; |
100 | runtime_version_.build = 0; |
101 | } |
102 | |
103 | return status::success; |
104 | } |
105 | |
106 | status_t ocl_gpu_device_info_t::init_extensions(engine_t *engine) { |
107 | cl_int err = CL_SUCCESS; |
108 | auto device = utils::downcast<const ocl_gpu_engine_t *>(engine)->device(); |
109 | |
110 | // query device for extensions |
111 | size_t param_size = 0; |
112 | err = clGetDeviceInfo( |
113 | device, CL_DEVICE_EXTENSIONS, 0, nullptr, ¶m_size); |
114 | OCL_CHECK(err); |
115 | |
116 | std::string extension_string(param_size, '\0'); |
117 | err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, param_size, |
118 | &extension_string[0], ¶m_size); |
119 | OCL_CHECK(err); |
120 | |
121 | // convert to ours |
122 | using namespace compute; |
123 | for (uint64_t i_ext = 1; i_ext < (uint64_t)device_ext_t::last; |
124 | i_ext <<= 1) { |
125 | const char *s_ext = ext2cl_str((device_ext_t)i_ext); |
126 | if (s_ext && extension_string.find(s_ext) != std::string::npos) { |
127 | extensions_ |= i_ext; |
128 | } |
129 | } |
130 | |
131 | // Handle future extensions, not yet supported by the OpenCL API |
132 | extensions_ |= (uint64_t)get_future_extensions(gpu_arch()); |
133 | |
134 | return status::success; |
135 | } |
136 | |
137 | status_t ocl_gpu_device_info_t::init_attributes(engine_t *engine) { |
138 | cl_int err = CL_SUCCESS; |
139 | auto device = utils::downcast<const ocl_gpu_engine_t *>(engine)->device(); |
140 | |
141 | CHECK(get_ocl_device_eu_count(device, &eu_count_)); |
142 | |
143 | size_t max_wg_size = 0; |
144 | err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, |
145 | sizeof(max_wg_size), &max_wg_size, nullptr); |
146 | OCL_CHECK(err); |
147 | max_wg_size_ = max_wg_size; |
148 | |
149 | return status::success; |
150 | } |
151 | |
152 | std::string ocl_gpu_device_info_t::get_cl_ext_options() const { |
153 | using namespace compute; |
154 | |
155 | std::string opts; |
156 | for (uint64_t i_ext = 1; i_ext < (uint64_t)device_ext_t::last; |
157 | i_ext <<= 1) { |
158 | auto ext = (device_ext_t)i_ext; |
159 | |
160 | // Use real GPU extensions |
161 | if (!has(ext)) continue; |
162 | |
163 | // These extensions are not handled properly by the OpenCL runtime. |
164 | // Pass macros for them manually. |
165 | if (utils::one_of(ext, device_ext_t::intel_global_float_atomics, |
166 | device_ext_t::intel_subgroup_matrix_multiply_accumulate, |
167 | device_ext_t:: |
168 | intel_subgroup_split_matrix_multiply_accumulate, |
169 | device_ext_t::intel_global_float_atomics, |
170 | device_ext_t::future_bf16_cvt, |
171 | device_ext_t::intel_dot_accumulate)) |
172 | opts += std::string("-D" ) + ext2cl_str(ext) + " " ; |
173 | } |
174 | if (!opts.empty()) { opts[opts.size() - 1] = '\0'; } |
175 | return opts; |
176 | } |
177 | |
178 | } // namespace ocl |
179 | } // namespace gpu |
180 | } // namespace impl |
181 | } // namespace dnnl |
182 | |