1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <atomic> |
18 | #include <limits> |
19 | #include <mutex> |
20 | #include <utility> |
21 | #include <vector> |
22 | #include <CL/cl.h> |
23 | #include <unordered_map> |
24 | |
25 | #include "gpu/ocl/profile.hpp" |
26 | #include "gpu/profile.hpp" |
27 | |
28 | #include "common/c_types_map.hpp" |
29 | #include "common/utils.hpp" |
30 | #include "gpu/ocl/mdapi_utils.hpp" |
31 | #include "gpu/ocl/ocl_stream.hpp" |
32 | #include "gpu/ocl/ocl_utils.hpp" |
33 | |
34 | using namespace dnnl::impl; |
35 | using namespace dnnl::impl::gpu::ocl; |
36 | |
37 | namespace dnnl { |
38 | namespace impl { |
39 | namespace gpu { |
40 | namespace ocl { |
41 | |
42 | struct profile_event_t { |
43 | profile_event_t(cl_event event, const ocl_stream_t *stream, uint64_t stamp) |
44 | : event(event), stream(stream), stamp(stamp) {} |
45 | |
46 | cl_event event; |
47 | const ocl_stream_t *stream; |
48 | uint64_t stamp; |
49 | }; |
50 | |
51 | static std::vector<profile_event_t> events; |
52 | static std::atomic<uint64_t> stamp(0); |
53 | |
54 | void notify_before_exec() { |
55 | stamp++; |
56 | } |
57 | |
58 | void register_profile_event(cl_event event, const ocl_stream_t *ocl_stream) { |
59 | static std::mutex mutex; |
60 | std::lock_guard<std::mutex> lock(mutex); |
61 | events.emplace_back(event, ocl_stream, stamp); |
62 | } |
63 | |
64 | status_t get_profile_info(uint64_t &nsec, double &freq, int mode) { |
65 | nsec = 0; |
66 | freq = 0; |
67 | std::unordered_map<uint64_t, profile_entry_t> stamp2entry; |
68 | for (auto &ev : events) { |
69 | cl_ulong beg, end; |
70 | OCL_CHECK(clGetEventProfilingInfo(ev.event, CL_PROFILING_COMMAND_START, |
71 | sizeof(beg), &beg, nullptr)); |
72 | OCL_CHECK(clGetEventProfilingInfo(ev.event, CL_PROFILING_COMMAND_END, |
73 | sizeof(end), &end, nullptr)); |
74 | auto &entry = stamp2entry[ev.stamp]; |
75 | entry.nsec += (end - beg); |
76 | entry.freq += ev.stream->mdapi_helper().get_freq(ev.event); |
77 | entry.kernel_count++; |
78 | } |
79 | return get_profile_info_impl(nsec, freq, mode, stamp2entry); |
80 | } |
81 | |
82 | status_t reset_profiling() { |
83 | for (auto &ev : events) |
84 | clReleaseEvent(ev.event); |
85 | events.clear(); |
86 | return status::success; |
87 | } |
88 | |
89 | } // namespace ocl |
90 | } // namespace gpu |
91 | } // namespace impl |
92 | } // namespace dnnl |
93 | |