1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <vector> |
18 | |
19 | #include "gpu/profile.hpp" |
20 | |
21 | #include "common/c_types_map.hpp" |
22 | #include "common/utils.hpp" |
23 | |
24 | #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL |
25 | #include "gpu/ocl/profile.hpp" |
26 | #endif |
27 | |
28 | #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL |
29 | #include "sycl/profile.hpp" |
30 | #endif |
31 | |
32 | namespace dnnl { |
33 | namespace impl { |
34 | namespace gpu { |
35 | |
36 | static setting_t<bool> profile {false}; |
37 | |
38 | bool is_profiling_enabled() { |
39 | return profile.get(); |
40 | } |
41 | |
42 | status_t get_profile_info_impl(uint64_t &nsec, double &freq, int _mode, |
43 | const std::unordered_map<uint64_t, profile_entry_t> &stamp2entry) { |
44 | auto mode = static_cast<profile_mode_t>(_mode); |
45 | switch (mode) { |
46 | case profile_mode_t::sum: |
47 | nsec = 0; |
48 | freq = 0; |
49 | for (auto &kv : stamp2entry) { |
50 | auto &e = kv.second; |
51 | nsec += e.nsec; |
52 | freq += e.freq / e.kernel_count; |
53 | } |
54 | freq /= stamp2entry.size(); |
55 | break; |
56 | case profile_mode_t::min: |
57 | nsec = std::numeric_limits<uint64_t>::max(); |
58 | freq = 0; |
59 | for (auto &kv : stamp2entry) { |
60 | auto &e = kv.second; |
61 | if (e.nsec < nsec) { |
62 | nsec = e.nsec; |
63 | freq = e.freq / e.kernel_count; |
64 | } |
65 | } |
66 | break; |
67 | default: assert(!"Unexpected mode" ); |
68 | } |
69 | return status::success; |
70 | } |
71 | |
72 | } // namespace gpu |
73 | } // namespace impl |
74 | } // namespace dnnl |
75 | |
76 | using dnnl::impl::status_t; |
77 | |
78 | extern "C" status_t DNNL_API dnnl_impl_gpu_set_profiling(int flag) { |
79 | using namespace dnnl::impl; |
80 | dnnl::impl::gpu::profile.set((bool)flag); |
81 | return status::success; |
82 | } |
83 | |
84 | extern "C" status_t DNNL_API dnnl_impl_gpu_reset_profiling() { |
85 | using namespace dnnl::impl; |
86 | #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL |
87 | return dnnl::impl::gpu::ocl::reset_profiling(); |
88 | #endif |
89 | #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL |
90 | return dnnl::impl::sycl::reset_profiling(); |
91 | #endif |
92 | return status::unimplemented; |
93 | } |
94 | |
95 | extern "C" status_t DNNL_API dnnl_impl_gpu_get_profile_info( |
96 | uint64_t &nsec, double &freq, int mode) { |
97 | using namespace dnnl::impl; |
98 | #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL |
99 | return dnnl::impl::gpu::ocl::get_profile_info(nsec, freq, mode); |
100 | #endif |
101 | #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL |
102 | return dnnl::impl::sycl::get_profile_info(nsec, freq, mode); |
103 | #endif |
104 | return status::unimplemented; |
105 | } |
106 | |