1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <atomic>
18#include <limits>
19#include <mutex>
20#include <utility>
21#include <vector>
22#include <CL/cl.h>
23#include <unordered_map>
24
25#include "gpu/ocl/profile.hpp"
26#include "gpu/profile.hpp"
27
28#include "common/c_types_map.hpp"
29#include "common/utils.hpp"
30#include "gpu/ocl/mdapi_utils.hpp"
31#include "gpu/ocl/ocl_stream.hpp"
32#include "gpu/ocl/ocl_utils.hpp"
33
34using namespace dnnl::impl;
35using namespace dnnl::impl::gpu::ocl;
36
37namespace dnnl {
38namespace impl {
39namespace gpu {
40namespace ocl {
41
42struct profile_event_t {
43 profile_event_t(cl_event event, const ocl_stream_t *stream, uint64_t stamp)
44 : event(event), stream(stream), stamp(stamp) {}
45
46 cl_event event;
47 const ocl_stream_t *stream;
48 uint64_t stamp;
49};
50
51static std::vector<profile_event_t> events;
52static std::atomic<uint64_t> stamp(0);
53
54void notify_before_exec() {
55 stamp++;
56}
57
58void register_profile_event(cl_event event, const ocl_stream_t *ocl_stream) {
59 static std::mutex mutex;
60 std::lock_guard<std::mutex> lock(mutex);
61 events.emplace_back(event, ocl_stream, stamp);
62}
63
64status_t get_profile_info(uint64_t &nsec, double &freq, int mode) {
65 nsec = 0;
66 freq = 0;
67 std::unordered_map<uint64_t, profile_entry_t> stamp2entry;
68 for (auto &ev : events) {
69 cl_ulong beg, end;
70 OCL_CHECK(clGetEventProfilingInfo(ev.event, CL_PROFILING_COMMAND_START,
71 sizeof(beg), &beg, nullptr));
72 OCL_CHECK(clGetEventProfilingInfo(ev.event, CL_PROFILING_COMMAND_END,
73 sizeof(end), &end, nullptr));
74 auto &entry = stamp2entry[ev.stamp];
75 entry.nsec += (end - beg);
76 entry.freq += ev.stream->mdapi_helper().get_freq(ev.event);
77 entry.kernel_count++;
78 }
79 return get_profile_info_impl(nsec, freq, mode, stamp2entry);
80}
81
82status_t reset_profiling() {
83 for (auto &ev : events)
84 clReleaseEvent(ev.event);
85 events.clear();
86 return status::success;
87}
88
89} // namespace ocl
90} // namespace gpu
91} // namespace impl
92} // namespace dnnl
93