1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include "gpu/ocl/mdapi_utils.hpp" |
18 | |
19 | #include "oneapi/dnnl/dnnl_config.h" |
20 | |
21 | #if defined(__linux__) && (DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL) |
22 | #define DNNL_GPU_ENABLE_MDAPI |
23 | #endif |
24 | |
25 | #ifdef DNNL_GPU_ENABLE_MDAPI |
26 | #include <cassert> |
27 | #include <cstring> |
28 | #include <dlfcn.h> |
29 | #include <vector> |
30 | |
31 | #include "gpu/ocl/mdapi/metrics_discovery_api.h" |
32 | #include "gpu/ocl/ocl_utils.hpp" |
33 | |
34 | #ifndef CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL |
35 | #define CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL 0x407F |
36 | #endif |
37 | #endif |
38 | |
39 | namespace dnnl { |
40 | namespace impl { |
41 | namespace gpu { |
42 | namespace ocl { |
43 | |
44 | #ifdef DNNL_GPU_ENABLE_MDAPI |
45 | |
46 | static bool open_metrics_device(MetricsDiscovery::IMetricsDevice_1_5 **device) { |
47 | static MetricsDiscovery::OpenMetricsDevice_fn func; |
48 | if (!func) { |
49 | void *lib = dlopen("libmd.so.1" , RTLD_LAZY); |
50 | *(void **)(&func) = dlsym(lib, "OpenMetricsDevice" ); |
51 | } |
52 | if (!func) return false; |
53 | auto code = func(device); |
54 | return code == MetricsDiscovery::CC_OK; |
55 | } |
56 | |
57 | static bool close_metrics_device(MetricsDiscovery::IMetricsDevice_1_5 *device) { |
58 | static MetricsDiscovery::CloseMetricsDevice_fn func; |
59 | if (!func) { |
60 | void *lib = dlopen("libmd.so.1" , RTLD_LAZY); |
61 | *(void **)(&func) = dlsym(lib, "CloseMetricsDevice" ); |
62 | } |
63 | if (!func) return false; |
64 | auto code = func(device); |
65 | return code == MetricsDiscovery::CC_OK; |
66 | } |
67 | |
68 | class mdapi_helper_impl_t { |
69 | public: |
70 | mdapi_helper_impl_t() { |
71 | if (!open_metrics_device(&metric_device_)) return; |
72 | if (!activate_freq_metric()) return; |
73 | is_initialized_ = true; |
74 | } |
75 | |
76 | ~mdapi_helper_impl_t() { close_metrics_device(metric_device_); } |
77 | |
78 | cl_command_queue create_queue( |
79 | cl_context ctx, cl_device_id dev, cl_int *err) const { |
80 | if (!is_initialized_) { |
81 | *err = CL_INVALID_VALUE; |
82 | return nullptr; |
83 | } |
84 | using clCreatePerfCountersCommandQueueINTEL_func_t |
85 | = cl_command_queue (*)(cl_context, cl_device_id, |
86 | cl_command_queue_properties, cl_uint, cl_int *); |
87 | static ext_func_t<clCreatePerfCountersCommandQueueINTEL_func_t> |
88 | create_queue_with_perf_counters( |
89 | "clCreatePerfCountersCommandQueueINTEL" ); |
90 | auto func = create_queue_with_perf_counters.get_func( |
91 | get_ocl_platform(dev)); |
92 | if (!func) { |
93 | *err = CL_INVALID_VALUE; |
94 | return nullptr; |
95 | } |
96 | auto config = metric_set_->GetParams()->ApiSpecificId.OCL; |
97 | return func(ctx, dev, CL_QUEUE_PROFILING_ENABLE, config, err); |
98 | } |
99 | |
100 | double get_freq(cl_event event) const { |
101 | if (!is_initialized_) return 0; |
102 | |
103 | using namespace MetricsDiscovery; |
104 | auto mparams = metric_set_->GetParams(); |
105 | auto report_size = mparams->QueryReportSize; |
106 | size_t out_size = report_size; |
107 | std::vector<uint8_t> report(report_size); |
108 | |
109 | cl_int err; |
110 | err = clGetEventProfilingInfo(event, |
111 | CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, report_size, |
112 | report.data(), &out_size); |
113 | if (err != CL_SUCCESS) return 0; |
114 | if (out_size != report_size) return 0; |
115 | |
116 | std::vector<TTypedValue_1_0> results( |
117 | mparams->MetricsCount + mparams->InformationCount); |
118 | |
119 | uint32_t report_count = 0; |
120 | TCompletionCode code; |
121 | code = metric_set_->CalculateMetrics(report.data(), report_size, |
122 | results.data(), |
123 | (uint32_t)(results.size() * sizeof(TTypedValue_1_0)), |
124 | &report_count, false); |
125 | if (code != CC_OK) return 0; |
126 | if (report_count < 1) return 0; |
127 | |
128 | auto &value = results[freq_metric_idx_]; |
129 | assert(value.ValueType == EValueType::VALUE_TYPE_UINT64); |
130 | return value.ValueUInt64 * 1e6; |
131 | } |
132 | |
133 | private: |
134 | bool activate_freq_metric() { |
135 | using namespace MetricsDiscovery; |
136 | |
137 | auto *params = metric_device_->GetParams(); |
138 | |
139 | int major = params->Version.MajorNumber; |
140 | int minor = params->Version.MinorNumber; |
141 | auto _1 = 1; |
142 | if (std::tie(major, minor) < std::tie(_1, _1)) return false; |
143 | |
144 | auto api_mask = API_TYPE_OCL; |
145 | for (uint32_t i = 0; i < params->ConcurrentGroupsCount; i++) { |
146 | auto group = metric_device_->GetConcurrentGroup(i); |
147 | auto gparams = group->GetParams(); |
148 | for (uint32_t j = 0; j < gparams->MetricSetsCount; j++) { |
149 | auto set = group->GetMetricSet(j); |
150 | auto sparams = set->GetParams(); |
151 | if (!(sparams->ApiMask & api_mask)) continue; |
152 | if (!strcmp(sparams->SymbolName, "ComputeBasic" )) { |
153 | metric_set_ = set; |
154 | |
155 | for (uint32_t k = 0; k < sparams->MetricsCount; k++) { |
156 | auto metric = set->GetMetric(k); |
157 | auto mparams = metric->GetParams(); |
158 | if (!strcmp(mparams->SymbolName, |
159 | "AvgGpuCoreFrequencyMHz" )) |
160 | freq_metric_idx_ = k; |
161 | } |
162 | } |
163 | } |
164 | } |
165 | |
166 | if (freq_metric_idx_ < 0) return false; |
167 | |
168 | TCompletionCode code; |
169 | code = metric_set_->SetApiFiltering(api_mask); |
170 | if (code != CC_OK) return false; |
171 | |
172 | code = metric_set_->Activate(); |
173 | if (code != CC_OK) return false; |
174 | |
175 | return true; |
176 | } |
177 | |
178 | bool is_initialized_ = false; |
179 | MetricsDiscovery::IMetricsDevice_1_5 *metric_device_ = nullptr; |
180 | MetricsDiscovery::IMetricSet_1_1 *metric_set_ = nullptr; |
181 | int freq_metric_idx_ = -1; |
182 | }; |
183 | |
184 | static std::shared_ptr<mdapi_helper_impl_t> &mdapi_helper_impl() { |
185 | static auto instance = std::make_shared<mdapi_helper_impl_t>(); |
186 | return instance; |
187 | } |
188 | |
189 | mdapi_helper_t::mdapi_helper_t() : impl_(mdapi_helper_impl()) {} |
190 | |
191 | cl_command_queue mdapi_helper_t::create_queue( |
192 | cl_context ctx, cl_device_id dev, cl_int *err) const { |
193 | return impl_->create_queue(ctx, dev, err); |
194 | } |
195 | |
196 | double mdapi_helper_t::get_freq(cl_event event) const { |
197 | return impl_->get_freq(event); |
198 | } |
199 | |
200 | #else |
201 | |
202 | mdapi_helper_t::mdapi_helper_t() = default; |
203 | |
204 | cl_command_queue mdapi_helper_t::create_queue( |
205 | cl_context ctx, cl_device_id dev, cl_int *err) const { |
206 | *err = CL_INVALID_VALUE; |
207 | return nullptr; |
208 | } |
209 | |
210 | double mdapi_helper_t::get_freq(cl_event event) const { |
211 | return 0; |
212 | } |
213 | |
214 | #endif |
215 | |
216 | } // namespace ocl |
217 | } // namespace gpu |
218 | } // namespace impl |
219 | } // namespace dnnl |
220 | |