1 | /******************************************************************************* |
2 | * Copyright 2018-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifdef _WIN32 |
18 | #include <malloc.h> |
19 | #include <windows.h> |
20 | #endif |
21 | |
22 | #if defined __unix__ || defined __APPLE__ || defined __FreeBSD__ \ |
23 | || defined __Fuchsia__ |
24 | #include <unistd.h> |
25 | #endif |
26 | |
27 | #ifdef __unix__ |
28 | #include <sys/stat.h> |
29 | #include <sys/types.h> |
30 | #endif |
31 | |
32 | #include <algorithm> |
33 | #include <climits> |
34 | #include <cstdio> |
35 | #include <cstdlib> |
36 | #include <cstring> |
37 | #include <mutex> |
38 | #include <string> |
39 | |
40 | #include "oneapi/dnnl/dnnl.h" |
41 | |
42 | #include "memory_debug.hpp" |
43 | #include "utils.hpp" |
44 | |
45 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
46 | #include "cpu/platform.hpp" |
47 | #endif |
48 | |
49 | namespace dnnl { |
50 | namespace impl { |
51 | |
52 | int getenv(const char *name, char *buffer, int buffer_size) { |
53 | if (name == nullptr || buffer_size < 0 |
54 | || (buffer == nullptr && buffer_size > 0)) |
55 | return INT_MIN; |
56 | |
57 | int result = 0; |
58 | int term_zero_idx = 0; |
59 | size_t value_length = 0; |
60 | |
61 | #ifdef _WIN32 |
62 | value_length = GetEnvironmentVariable(name, buffer, buffer_size); |
63 | #else |
64 | const char *value = ::getenv(name); |
65 | value_length = value == nullptr ? 0 : strlen(value); |
66 | #endif |
67 | |
68 | if (value_length > INT_MAX) |
69 | result = INT_MIN; |
70 | else { |
71 | int int_value_length = (int)value_length; |
72 | if (int_value_length >= buffer_size) { |
73 | result = -int_value_length; |
74 | } else { |
75 | term_zero_idx = int_value_length; |
76 | result = int_value_length; |
77 | #ifndef _WIN32 |
78 | if (value) strncpy(buffer, value, buffer_size - 1); |
79 | #endif |
80 | } |
81 | } |
82 | |
83 | if (buffer != nullptr) buffer[term_zero_idx] = '\0'; |
84 | return result; |
85 | } |
86 | |
87 | int getenv_int(const char *name, int default_value) { |
88 | int value = default_value; |
89 | // # of digits in the longest 32-bit signed int + sign + terminating null |
90 | const int len = 12; |
91 | char value_str[len]; |
92 | if (getenv(name, value_str, len) > 0) value = atoi(value_str); |
93 | return value; |
94 | } |
95 | |
96 | int getenv_int_user(const char *name, int default_value) { |
97 | int value = default_value; |
98 | // # of digits in the longest 32-bit signed int + sign + terminating null |
99 | const int len = 12; |
100 | char value_str[len]; |
101 | for (const auto &prefix : {"ONEDNN_" , "DNNL_" }) { |
102 | std::string name_str = std::string(prefix) + std::string(name); |
103 | if (getenv(name_str.c_str(), value_str, len) > 0) { |
104 | value = atoi(value_str); |
105 | break; |
106 | } |
107 | } |
108 | return value; |
109 | } |
110 | |
111 | std::string getenv_string_user(const char *name) { |
112 | // Random number to fit possible string input. |
113 | std::string value; |
114 | const int len = 32; |
115 | char value_str[len]; |
116 | for (const auto &prefix : {"ONEDNN_" , "DNNL_" }) { |
117 | std::string name_str = std::string(prefix) + std::string(name); |
118 | if (getenv(name_str.c_str(), value_str, len) > 0) { |
119 | value = value_str; |
120 | break; |
121 | } |
122 | } |
123 | std::transform(value.begin(), value.end(), value.begin(), ::tolower); |
124 | return value; |
125 | } |
126 | |
127 | FILE *fopen(const char *filename, const char *mode) { |
128 | #ifdef _WIN32 |
129 | FILE *fp = NULL; |
130 | return ::fopen_s(&fp, filename, mode) ? NULL : fp; |
131 | #else |
132 | return ::fopen(filename, mode); |
133 | #endif |
134 | } |
135 | |
136 | int getpagesize() { |
137 | #ifdef _WIN32 |
138 | SYSTEM_INFO info; |
139 | GetSystemInfo(&info); |
140 | return info.dwPageSize; |
141 | #else |
142 | return ::getpagesize(); |
143 | #endif |
144 | } |
145 | |
146 | void *malloc(size_t size, int alignment) { |
147 | void *ptr; |
148 | if (memory_debug::is_mem_debug()) |
149 | return memory_debug::malloc(size, alignment); |
150 | |
151 | #ifdef _WIN32 |
152 | ptr = _aligned_malloc(size, alignment); |
153 | int rc = ptr ? 0 : -1; |
154 | #else |
155 | int rc = ::posix_memalign(&ptr, alignment, size); |
156 | #endif |
157 | |
158 | return (rc == 0) ? ptr : nullptr; |
159 | } |
160 | |
161 | void free(void *p) { |
162 | |
163 | if (memory_debug::is_mem_debug()) return memory_debug::free(p); |
164 | |
165 | #ifdef _WIN32 |
166 | _aligned_free(p); |
167 | #else |
168 | ::free(p); |
169 | #endif |
170 | } |
171 | |
172 | // Atomic operations |
173 | int32_t fetch_and_add(int32_t *dst, int32_t val) { |
174 | #ifdef _WIN32 |
175 | return InterlockedExchangeAdd(reinterpret_cast<long *>(dst), val); |
176 | #else |
177 | return __sync_fetch_and_add(dst, val); |
178 | #endif |
179 | } |
180 | |
181 | static setting_t<bool> jit_dump {false}; |
182 | bool get_jit_dump() { |
183 | if (!jit_dump.initialized()) { |
184 | static bool val = getenv_int_user("JIT_DUMP" , jit_dump.get()); |
185 | jit_dump.set(val); |
186 | } |
187 | return jit_dump.get(); |
188 | } |
189 | |
190 | #if defined(DNNL_AARCH64) && (DNNL_AARCH64 == 1) |
191 | static setting_t<unsigned> jit_profiling_flags {DNNL_JIT_PROFILE_LINUX_PERFMAP}; |
192 | #else |
193 | static setting_t<unsigned> jit_profiling_flags {DNNL_JIT_PROFILE_VTUNE}; |
194 | #endif |
195 | unsigned get_jit_profiling_flags() { |
196 | MAYBE_UNUSED(jit_profiling_flags); |
197 | unsigned flag = 0; |
198 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
199 | if (!jit_profiling_flags.initialized()) { |
200 | static unsigned val |
201 | = getenv_int_user("JIT_PROFILE" , jit_profiling_flags.get()); |
202 | jit_profiling_flags.set(val); |
203 | } |
204 | flag = jit_profiling_flags.get(); |
205 | #endif |
206 | return flag; |
207 | } |
208 | |
209 | static setting_t<std::string> jit_profiling_jitdumpdir; |
210 | dnnl_status_t init_jit_profiling_jitdumpdir( |
211 | const char *jitdumpdir, bool overwrite) { |
212 | #ifdef __linux__ |
213 | static std::mutex m; |
214 | std::lock_guard<std::mutex> g(m); |
215 | |
216 | if (jit_profiling_jitdumpdir.initialized() && !overwrite) |
217 | return status::success; |
218 | |
219 | if (!jitdumpdir) { |
220 | char buf[PATH_MAX]; |
221 | if (getenv("JITDUMPDIR" , buf, sizeof(buf)) > 0) |
222 | jit_profiling_jitdumpdir.set(buf); |
223 | else if (getenv("HOME" , buf, sizeof(buf)) > 0) |
224 | jit_profiling_jitdumpdir.set(buf); |
225 | else |
226 | jit_profiling_jitdumpdir.set("." ); |
227 | } else |
228 | jit_profiling_jitdumpdir.set(jitdumpdir); |
229 | |
230 | return status::success; |
231 | #else |
232 | UNUSED(jit_profiling_jitdumpdir); |
233 | return status::unimplemented; |
234 | #endif |
235 | } |
236 | |
237 | std::string get_jit_profiling_jitdumpdir() { |
238 | std::string jitdumpdir; |
239 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
240 | if (!jit_profiling_jitdumpdir.initialized()) |
241 | init_jit_profiling_jitdumpdir(nullptr, false); |
242 | jitdumpdir = jit_profiling_jitdumpdir.get(); |
243 | #endif |
244 | return jitdumpdir; |
245 | } |
246 | |
247 | } // namespace impl |
248 | } // namespace dnnl |
249 | |
250 | dnnl_status_t dnnl_set_jit_dump(int enabled) { |
251 | using namespace dnnl::impl; |
252 | jit_dump.set(enabled); |
253 | return status::success; |
254 | } |
255 | |
256 | dnnl_status_t dnnl_set_jit_profiling_flags(unsigned flags) { |
257 | using namespace dnnl::impl; |
258 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
259 | unsigned mask = DNNL_JIT_PROFILE_VTUNE; |
260 | #ifdef __linux__ |
261 | mask |= DNNL_JIT_PROFILE_LINUX_PERF; |
262 | mask |= DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC; |
263 | #endif |
264 | if (flags & ~mask) return status::invalid_arguments; |
265 | jit_profiling_flags.set(flags); |
266 | return status::success; |
267 | #else |
268 | return status::unimplemented; |
269 | #endif |
270 | } |
271 | |
272 | dnnl_status_t dnnl_set_jit_profiling_jitdumpdir(const char *dir) { |
273 | auto status = dnnl::impl::status::unimplemented; |
274 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
275 | status = dnnl::impl::init_jit_profiling_jitdumpdir(dir, true); |
276 | #endif |
277 | return status; |
278 | } |
279 | |
280 | dnnl_status_t dnnl_set_max_cpu_isa(dnnl_cpu_isa_t isa) { |
281 | auto status = dnnl::impl::status::runtime_error; |
282 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
283 | status = dnnl::impl::cpu::platform::set_max_cpu_isa(isa); |
284 | #endif |
285 | return status; |
286 | } |
287 | |
288 | dnnl_cpu_isa_t dnnl_get_effective_cpu_isa() { |
289 | auto isa = dnnl_cpu_isa_default; |
290 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
291 | isa = dnnl::impl::cpu::platform::get_effective_cpu_isa(); |
292 | #endif |
293 | return isa; |
294 | } |
295 | |
296 | dnnl_status_t dnnl_set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints) { |
297 | auto status = dnnl::impl::status::runtime_error; |
298 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
299 | status = dnnl::impl::cpu::platform::set_cpu_isa_hints(isa_hints); |
300 | #endif |
301 | return status; |
302 | } |
303 | |
304 | dnnl_cpu_isa_hints_t dnnl_get_cpu_isa_hints() { |
305 | auto isa_hint = dnnl_cpu_isa_no_hints; |
306 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
307 | isa_hint = dnnl::impl::cpu::platform::get_cpu_isa_hints(); |
308 | #endif |
309 | return isa_hint; |
310 | } |
311 | |
312 | #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL |
313 | #include "oneapi/dnnl/dnnl_threadpool_iface.hpp" |
314 | namespace dnnl { |
315 | namespace impl { |
316 | namespace threadpool_utils { |
317 | |
318 | namespace { |
319 | static thread_local dnnl::threadpool_interop::threadpool_iface |
320 | *active_threadpool |
321 | = nullptr; |
322 | } |
323 | |
324 | void DNNL_API activate_threadpool( |
325 | dnnl::threadpool_interop::threadpool_iface *tp) { |
326 | assert(!active_threadpool); |
327 | if (!active_threadpool) active_threadpool = tp; |
328 | } |
329 | |
330 | void DNNL_API deactivate_threadpool() { |
331 | active_threadpool = nullptr; |
332 | } |
333 | |
334 | dnnl::threadpool_interop::threadpool_iface *get_active_threadpool() { |
335 | return active_threadpool; |
336 | } |
337 | |
338 | int &get_threadlocal_max_concurrency() { |
339 | thread_local int max_concurrency |
340 | = (int)cpu::platform::get_max_threads_to_use(); |
341 | assert(max_concurrency > 0); |
342 | return max_concurrency; |
343 | } |
344 | |
345 | int DNNL_API get_max_concurrency() { |
346 | return get_threadlocal_max_concurrency(); |
347 | } |
348 | |
349 | } // namespace threadpool_utils |
350 | } // namespace impl |
351 | } // namespace dnnl |
352 | #endif |
353 | |