1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <vector>
18
19#include "gpu/profile.hpp"
20
21#include "common/c_types_map.hpp"
22#include "common/utils.hpp"
23
24#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
25#include "gpu/ocl/profile.hpp"
26#endif
27
28#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL
29#include "sycl/profile.hpp"
30#endif
31
32namespace dnnl {
33namespace impl {
34namespace gpu {
35
36static setting_t<bool> profile {false};
37
38bool is_profiling_enabled() {
39 return profile.get();
40}
41
42status_t get_profile_info_impl(uint64_t &nsec, double &freq, int _mode,
43 const std::unordered_map<uint64_t, profile_entry_t> &stamp2entry) {
44 auto mode = static_cast<profile_mode_t>(_mode);
45 switch (mode) {
46 case profile_mode_t::sum:
47 nsec = 0;
48 freq = 0;
49 for (auto &kv : stamp2entry) {
50 auto &e = kv.second;
51 nsec += e.nsec;
52 freq += e.freq / e.kernel_count;
53 }
54 freq /= stamp2entry.size();
55 break;
56 case profile_mode_t::min:
57 nsec = std::numeric_limits<uint64_t>::max();
58 freq = 0;
59 for (auto &kv : stamp2entry) {
60 auto &e = kv.second;
61 if (e.nsec < nsec) {
62 nsec = e.nsec;
63 freq = e.freq / e.kernel_count;
64 }
65 }
66 break;
67 default: assert(!"Unexpected mode");
68 }
69 return status::success;
70}
71
72} // namespace gpu
73} // namespace impl
74} // namespace dnnl
75
76using dnnl::impl::status_t;
77
78extern "C" status_t DNNL_API dnnl_impl_gpu_set_profiling(int flag) {
79 using namespace dnnl::impl;
80 dnnl::impl::gpu::profile.set((bool)flag);
81 return status::success;
82}
83
84extern "C" status_t DNNL_API dnnl_impl_gpu_reset_profiling() {
85 using namespace dnnl::impl;
86#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
87 return dnnl::impl::gpu::ocl::reset_profiling();
88#endif
89#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL
90 return dnnl::impl::sycl::reset_profiling();
91#endif
92 return status::unimplemented;
93}
94
95extern "C" status_t DNNL_API dnnl_impl_gpu_get_profile_info(
96 uint64_t &nsec, double &freq, int mode) {
97 using namespace dnnl::impl;
98#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
99 return dnnl::impl::gpu::ocl::get_profile_info(nsec, freq, mode);
100#endif
101#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL
102 return dnnl::impl::sycl::get_profile_info(nsec, freq, mode);
103#endif
104 return status::unimplemented;
105}
106