1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include "gpu/ocl/mdapi_utils.hpp"
18
19#include "oneapi/dnnl/dnnl_config.h"
20
21#if defined(__linux__) && (DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL)
22#define DNNL_GPU_ENABLE_MDAPI
23#endif
24
25#ifdef DNNL_GPU_ENABLE_MDAPI
26#include <cassert>
27#include <cstring>
28#include <dlfcn.h>
29#include <vector>
30
31#include "gpu/ocl/mdapi/metrics_discovery_api.h"
32#include "gpu/ocl/ocl_utils.hpp"
33
34#ifndef CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL
35#define CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL 0x407F
36#endif
37#endif
38
39namespace dnnl {
40namespace impl {
41namespace gpu {
42namespace ocl {
43
44#ifdef DNNL_GPU_ENABLE_MDAPI
45
46static bool open_metrics_device(MetricsDiscovery::IMetricsDevice_1_5 **device) {
47 static MetricsDiscovery::OpenMetricsDevice_fn func;
48 if (!func) {
49 void *lib = dlopen("libmd.so.1", RTLD_LAZY);
50 *(void **)(&func) = dlsym(lib, "OpenMetricsDevice");
51 }
52 if (!func) return false;
53 auto code = func(device);
54 return code == MetricsDiscovery::CC_OK;
55}
56
57static bool close_metrics_device(MetricsDiscovery::IMetricsDevice_1_5 *device) {
58 static MetricsDiscovery::CloseMetricsDevice_fn func;
59 if (!func) {
60 void *lib = dlopen("libmd.so.1", RTLD_LAZY);
61 *(void **)(&func) = dlsym(lib, "CloseMetricsDevice");
62 }
63 if (!func) return false;
64 auto code = func(device);
65 return code == MetricsDiscovery::CC_OK;
66}
67
68class mdapi_helper_impl_t {
69public:
70 mdapi_helper_impl_t() {
71 if (!open_metrics_device(&metric_device_)) return;
72 if (!activate_freq_metric()) return;
73 is_initialized_ = true;
74 }
75
76 ~mdapi_helper_impl_t() { close_metrics_device(metric_device_); }
77
78 cl_command_queue create_queue(
79 cl_context ctx, cl_device_id dev, cl_int *err) const {
80 if (!is_initialized_) {
81 *err = CL_INVALID_VALUE;
82 return nullptr;
83 }
84 using clCreatePerfCountersCommandQueueINTEL_func_t
85 = cl_command_queue (*)(cl_context, cl_device_id,
86 cl_command_queue_properties, cl_uint, cl_int *);
87 static ext_func_t<clCreatePerfCountersCommandQueueINTEL_func_t>
88 create_queue_with_perf_counters(
89 "clCreatePerfCountersCommandQueueINTEL");
90 auto func = create_queue_with_perf_counters.get_func(
91 get_ocl_platform(dev));
92 if (!func) {
93 *err = CL_INVALID_VALUE;
94 return nullptr;
95 }
96 auto config = metric_set_->GetParams()->ApiSpecificId.OCL;
97 return func(ctx, dev, CL_QUEUE_PROFILING_ENABLE, config, err);
98 }
99
100 double get_freq(cl_event event) const {
101 if (!is_initialized_) return 0;
102
103 using namespace MetricsDiscovery;
104 auto mparams = metric_set_->GetParams();
105 auto report_size = mparams->QueryReportSize;
106 size_t out_size = report_size;
107 std::vector<uint8_t> report(report_size);
108
109 cl_int err;
110 err = clGetEventProfilingInfo(event,
111 CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, report_size,
112 report.data(), &out_size);
113 if (err != CL_SUCCESS) return 0;
114 if (out_size != report_size) return 0;
115
116 std::vector<TTypedValue_1_0> results(
117 mparams->MetricsCount + mparams->InformationCount);
118
119 uint32_t report_count = 0;
120 TCompletionCode code;
121 code = metric_set_->CalculateMetrics(report.data(), report_size,
122 results.data(),
123 (uint32_t)(results.size() * sizeof(TTypedValue_1_0)),
124 &report_count, false);
125 if (code != CC_OK) return 0;
126 if (report_count < 1) return 0;
127
128 auto &value = results[freq_metric_idx_];
129 assert(value.ValueType == EValueType::VALUE_TYPE_UINT64);
130 return value.ValueUInt64 * 1e6;
131 }
132
133private:
134 bool activate_freq_metric() {
135 using namespace MetricsDiscovery;
136
137 auto *params = metric_device_->GetParams();
138
139 int major = params->Version.MajorNumber;
140 int minor = params->Version.MinorNumber;
141 auto _1 = 1;
142 if (std::tie(major, minor) < std::tie(_1, _1)) return false;
143
144 auto api_mask = API_TYPE_OCL;
145 for (uint32_t i = 0; i < params->ConcurrentGroupsCount; i++) {
146 auto group = metric_device_->GetConcurrentGroup(i);
147 auto gparams = group->GetParams();
148 for (uint32_t j = 0; j < gparams->MetricSetsCount; j++) {
149 auto set = group->GetMetricSet(j);
150 auto sparams = set->GetParams();
151 if (!(sparams->ApiMask & api_mask)) continue;
152 if (!strcmp(sparams->SymbolName, "ComputeBasic")) {
153 metric_set_ = set;
154
155 for (uint32_t k = 0; k < sparams->MetricsCount; k++) {
156 auto metric = set->GetMetric(k);
157 auto mparams = metric->GetParams();
158 if (!strcmp(mparams->SymbolName,
159 "AvgGpuCoreFrequencyMHz"))
160 freq_metric_idx_ = k;
161 }
162 }
163 }
164 }
165
166 if (freq_metric_idx_ < 0) return false;
167
168 TCompletionCode code;
169 code = metric_set_->SetApiFiltering(api_mask);
170 if (code != CC_OK) return false;
171
172 code = metric_set_->Activate();
173 if (code != CC_OK) return false;
174
175 return true;
176 }
177
178 bool is_initialized_ = false;
179 MetricsDiscovery::IMetricsDevice_1_5 *metric_device_ = nullptr;
180 MetricsDiscovery::IMetricSet_1_1 *metric_set_ = nullptr;
181 int freq_metric_idx_ = -1;
182};
183
184static std::shared_ptr<mdapi_helper_impl_t> &mdapi_helper_impl() {
185 static auto instance = std::make_shared<mdapi_helper_impl_t>();
186 return instance;
187}
188
189mdapi_helper_t::mdapi_helper_t() : impl_(mdapi_helper_impl()) {}
190
191cl_command_queue mdapi_helper_t::create_queue(
192 cl_context ctx, cl_device_id dev, cl_int *err) const {
193 return impl_->create_queue(ctx, dev, err);
194}
195
196double mdapi_helper_t::get_freq(cl_event event) const {
197 return impl_->get_freq(event);
198}
199
200#else
201
202mdapi_helper_t::mdapi_helper_t() = default;
203
204cl_command_queue mdapi_helper_t::create_queue(
205 cl_context ctx, cl_device_id dev, cl_int *err) const {
206 *err = CL_INVALID_VALUE;
207 return nullptr;
208}
209
210double mdapi_helper_t::get_freq(cl_event event) const {
211 return 0;
212}
213
214#endif
215
216} // namespace ocl
217} // namespace gpu
218} // namespace impl
219} // namespace dnnl
220