1 | /******************************************************************************* |
2 | * Copyright 2016-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef COMMON_UTILS_HPP |
18 | #define COMMON_UTILS_HPP |
19 | |
20 | #include <atomic> |
21 | #include <cassert> |
22 | #include <climits> |
23 | #include <cmath> |
24 | #include <cstddef> |
25 | #include <cstdint> |
26 | #include <cstdio> |
27 | #include <cstdlib> |
28 | #include <string> |
29 | |
30 | #include <memory> |
31 | #include <string> |
32 | #include <tuple> |
33 | |
34 | #define MSAN_ENABLED 0 |
35 | #define ATTR_NO_MSAN |
36 | #if defined(__has_feature) |
37 | #if __has_feature(memory_sanitizer) |
38 | #undef MSAN_ENABLED |
39 | #define MSAN_ENABLED 1 |
40 | #undef ATTR_NO_MSAN |
41 | #define ATTR_NO_MSAN __attribute__((no_sanitize("memory"))) |
42 | #include <sanitizer/msan_interface.h> |
43 | #endif |
44 | #endif |
45 | |
46 | #include "c_types_map.hpp" |
47 | #include "nstl.hpp" |
48 | #include "z_magic.hpp" |
49 | |
50 | namespace dnnl { |
51 | namespace impl { |
52 | |
53 | #define DNNL_SHORT_CIRCUIT_SELF_ASSIGN(other) \ |
54 | do { \ |
55 | if (this == &other) return *this; \ |
56 | } while (0) |
57 | |
58 | #define DNNL_SHORT_CIRCUIT_SELF_COMPARISON(other) \ |
59 | do { \ |
60 | if (this == &other) return true; \ |
61 | } while (0) |
62 | |
63 | #define DNNL_DISALLOW_COPY_AND_ASSIGN(T) \ |
64 | T(const T &) = delete; \ |
65 | T &operator=(const T &) = delete; |
66 | |
67 | // Sanity check for 64 bits |
68 | static_assert(sizeof(void *) == 8, "oneDNN supports 64-bit architectures only" ); |
69 | |
70 | #define CHECK(f) \ |
71 | do { \ |
72 | status_t _status_ = f; \ |
73 | if (_status_ != status::success) return _status_; \ |
74 | } while (0) |
75 | |
76 | #define IMPLICATION(cause, effect) (!(cause) || !!(effect)) |
77 | |
78 | namespace utils { |
79 | |
80 | /* a bunch of std:: analogues to be compliant with any msvs version |
81 | * |
82 | * Rationale: msvs c++ (and even some c) headers contain special pragma that |
83 | * injects msvs-version check into object files in order to abi-mismatches |
84 | * during the static linking. This makes sense if e.g. std:: objects are passed |
85 | * through between application and library, which is not the case for oneDNN |
86 | * (since there is no any c++-rt dependent stuff, ideally...). */ |
87 | |
88 | /* SFINAE helper -- analogue to std::enable_if */ |
89 | template <bool expr, class T = void> |
90 | struct enable_if {}; |
91 | template <class T> |
92 | struct enable_if<true, T> { |
93 | typedef T type; |
94 | }; |
95 | |
96 | /* analogue std::conditional */ |
97 | template <bool, typename, typename> |
98 | struct conditional {}; |
99 | template <typename T, typename F> |
100 | struct conditional<true, T, F> { |
101 | typedef T type; |
102 | }; |
103 | template <typename T, typename F> |
104 | struct conditional<false, T, F> { |
105 | typedef F type; |
106 | }; |
107 | |
108 | template <bool, typename, bool, typename, typename> |
109 | struct conditional3 {}; |
110 | template <typename T, typename FT, typename FF> |
111 | struct conditional3<true, T, false, FT, FF> { |
112 | typedef T type; |
113 | }; |
114 | template <typename T, typename FT, typename FF> |
115 | struct conditional3<false, T, true, FT, FF> { |
116 | typedef FT type; |
117 | }; |
118 | template <typename T, typename FT, typename FF> |
119 | struct conditional3<false, T, false, FT, FF> { |
120 | typedef FF type; |
121 | }; |
122 | |
123 | template <bool, typename U, U, U> |
124 | struct conditional_v {}; |
125 | template <typename U, U t, U f> |
126 | struct conditional_v<true, U, t, f> { |
127 | static constexpr U value = t; |
128 | }; |
129 | template <typename U, U t, U f> |
130 | struct conditional_v<false, U, t, f> { |
131 | static constexpr U value = f; |
132 | }; |
133 | |
134 | template <typename T> |
135 | struct remove_reference { |
136 | typedef T type; |
137 | }; |
138 | template <typename T> |
139 | struct remove_reference<T &> { |
140 | typedef T type; |
141 | }; |
142 | template <typename T> |
143 | struct remove_reference<T &&> { |
144 | typedef T type; |
145 | }; |
146 | |
147 | template <typename T> |
148 | inline T &&forward(typename utils::remove_reference<T>::type &t) { |
149 | return static_cast<T &&>(t); |
150 | } |
151 | template <typename T> |
152 | inline T &&forward(typename utils::remove_reference<T>::type &&t) { |
153 | return static_cast<T &&>(t); |
154 | } |
155 | |
156 | template <typename T> |
157 | inline typename remove_reference<T>::type zero() { |
158 | auto zero = typename remove_reference<T>::type(); |
159 | return zero; |
160 | } |
161 | |
162 | template <typename T, typename... Args> |
163 | std::unique_ptr<T> make_unique(Args &&... args) { |
164 | return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); |
165 | } |
166 | |
167 | template <typename T, typename P> |
168 | constexpr bool everyone_is(T val, P item) { |
169 | return val == item; |
170 | } |
171 | template <typename T, typename P, typename... Args> |
172 | constexpr bool everyone_is(T val, P item, Args... item_others) { |
173 | return val == item && everyone_is(val, item_others...); |
174 | } |
175 | |
176 | template <typename T, typename P> |
177 | constexpr bool one_of(T val, P item) { |
178 | return val == item; |
179 | } |
180 | template <typename T, typename P, typename... Args> |
181 | constexpr bool one_of(T val, P item, Args... item_others) { |
182 | return val == item || one_of(val, item_others...); |
183 | } |
184 | |
185 | template <typename T, typename P> |
186 | constexpr P map(T pat, P def) { |
187 | return def; |
188 | } |
189 | template <typename T, typename P, typename... Args> |
190 | constexpr P map(T pat, P def, T item, P ival, Args... item_others) { |
191 | return pat == item ? ival : map(pat, def, item_others...); |
192 | } |
193 | |
194 | template <typename... Args> |
195 | constexpr bool any_null(Args... ptrs) { |
196 | return one_of(nullptr, ptrs...); |
197 | } |
198 | |
199 | template <typename T> |
200 | inline void array_copy(T *dst, const T *src, size_t size) { |
201 | for (size_t i = 0; i < size; ++i) |
202 | dst[i] = src[i]; |
203 | } |
204 | template <typename T> |
205 | inline bool array_cmp(const T *a1, const T *a2, size_t size) { |
206 | for (size_t i = 0; i < size; ++i) |
207 | if (a1[i] != a2[i]) return false; |
208 | return true; |
209 | } |
210 | template <typename T, typename U> |
211 | inline void array_set(T *arr, const U &val, size_t size) { |
212 | for (size_t i = 0; i < size; ++i) |
213 | arr[i] = static_cast<T>(val); |
214 | } |
215 | |
216 | namespace product_impl { |
217 | template <size_t> |
218 | struct int2type {}; |
219 | |
220 | template <typename T> |
221 | constexpr int product_impl(const T *arr, int2type<0>) { |
222 | return arr[0]; |
223 | } |
224 | |
225 | template <typename T, size_t num> |
226 | constexpr T product_impl(const T *arr, int2type<num>) { |
227 | return arr[0] * product_impl(arr + 1, int2type<num - 1>()); |
228 | } |
229 | } // namespace product_impl |
230 | |
231 | template <size_t num, typename T> |
232 | constexpr T array_product(const T *arr) { |
233 | return product_impl::product_impl(arr, product_impl::int2type<num - 1>()); |
234 | } |
235 | |
236 | template <typename T, typename R = T> |
237 | inline R array_product(const T *arr, size_t size) { |
238 | R prod = 1; |
239 | for (size_t i = 0; i < size; ++i) |
240 | prod *= arr[i]; |
241 | return prod; |
242 | } |
243 | |
244 | template <typename T, typename R = T> |
245 | inline R array_product(const std::vector<T> &v) { |
246 | return array_product<T, R>(v.data(), v.size()); |
247 | } |
248 | |
249 | template <typename T, typename R = T> |
250 | inline R array_min(const T *arr, size_t size) { |
251 | R min = std::numeric_limits<R>::max(); |
252 | for (size_t i = 0; i < size; ++i) |
253 | min = std::min(min, arr[i]); |
254 | return min; |
255 | } |
256 | |
257 | inline bool equal_with_nan(float v1, float v2) { |
258 | return (v1 == v2) || (std::isnan(v1) && std::isnan(v2)); |
259 | } |
260 | |
261 | /* Sorts an array of @p vals using @p comparator. Uses @p vals_2nd_level as a |
262 | * second level comparing criteria in case comparator returns 0 (equal values) |
263 | * for @p vals elements. |
264 | * While sorting the array of @p vals, the function permutes an array of |
265 | * @p vals_2nd_level and @p keys accordingly. |
266 | */ |
267 | template <typename T, typename U, typename F> |
268 | inline void simultaneous_sort( |
269 | T *vals, T *vals_2nd_level, U *keys, size_t size, F comparator) { |
270 | if (size == 0) return; |
271 | |
272 | for (size_t i = 0; i < size - 1; ++i) { |
273 | bool swapped = false; |
274 | |
275 | for (size_t j = 0; j < size - i - 1; j++) { |
276 | auto res = comparator(vals[j], vals[j + 1]); |
277 | if (res == 0) |
278 | res = comparator(vals_2nd_level[j], vals_2nd_level[j + 1]); |
279 | |
280 | if (res > 0) { |
281 | nstl::swap(vals[j], vals[j + 1]); |
282 | nstl::swap(vals_2nd_level[j], vals_2nd_level[j + 1]); |
283 | nstl::swap(keys[j], keys[j + 1]); |
284 | swapped = true; |
285 | } |
286 | } |
287 | |
288 | if (swapped == false) break; |
289 | } |
290 | } |
291 | |
292 | template <typename T> |
293 | constexpr const T &saturate(const T &low, const T &upper, const T &a) { |
294 | return nstl::max(low, nstl::min(upper, a)); |
295 | } |
296 | |
297 | template <typename T, typename U> |
298 | inline typename remove_reference<T>::type div_up(const T a, const U b) { |
299 | assert(b); |
300 | return static_cast<typename remove_reference<T>::type>((a + b - 1) / b); |
301 | } |
302 | |
303 | template <typename T, typename U> |
304 | inline typename remove_reference<T>::type rnd_up(const T a, const U b) { |
305 | return static_cast<typename remove_reference<T>::type>(div_up(a, b) * b); |
306 | } |
307 | |
308 | template <typename T, typename U> |
309 | constexpr typename remove_reference<T>::type rnd_dn(const T a, const U b) { |
310 | return static_cast<typename remove_reference<T>::type>((a / b) * b); |
311 | } |
312 | |
313 | template <typename T> |
314 | inline typename remove_reference<T>::type rnd_up_pow2(const T a) { |
315 | using R = typename remove_reference<T>::type; |
316 | if (a <= 0) |
317 | return static_cast<R>(1); |
318 | else { |
319 | T b = a - 1; |
320 | for (size_t v = 1; v < sizeof(T) * CHAR_BIT; v <<= 1) |
321 | b |= (b >> v); |
322 | return static_cast<R>(b + 1); |
323 | } |
324 | } |
325 | |
326 | template <typename T> |
327 | inline typename remove_reference<T>::type rnd_down_pow2(const T a) { |
328 | auto ret = rnd_up_pow2(a); |
329 | return ret == a ? ret : ret / 2; |
330 | } |
331 | |
332 | template <typename T, typename U> |
333 | inline typename remove_reference<T>::type max_div(const T a, const U b) { |
334 | U div = b; |
335 | while (div > 1) { |
336 | if (a % div == 0) return div; |
337 | div--; |
338 | } |
339 | return static_cast<typename remove_reference<T>::type>(div); |
340 | } |
341 | |
342 | template <typename T> |
343 | inline typename remove_reference<T>::type max_pow2_div(const T a) { |
344 | return static_cast<typename remove_reference<T>::type>(((a - 1) & ~a) + 1); |
345 | } |
346 | |
347 | template <typename T> |
348 | T *align_ptr(T *ptr, uintptr_t alignment) { |
349 | return (T *)(((uintptr_t)ptr + alignment - 1) & ~(alignment - 1)); |
350 | } |
351 | |
352 | template <typename T, typename U, typename V> |
353 | inline typename remove_reference<U>::type this_block_size( |
354 | const T offset, const U max, const V block_size) { |
355 | assert(offset < max); |
356 | // TODO (Roma): can't use nstl::max() due to circular dependency... we |
357 | // need to fix this |
358 | const T block_boundary = offset + block_size; |
359 | if (block_boundary > max) |
360 | return max - offset; |
361 | else |
362 | return block_size; |
363 | } |
364 | |
365 | template <typename T> |
366 | inline T nd_iterator_init(T start) { |
367 | return start; |
368 | } |
369 | template <typename T, typename U, typename W, typename... Args> |
370 | inline T nd_iterator_init(T start, U &x, const W &X, Args &&... tuple) { |
371 | start = nd_iterator_init(start, utils::forward<Args>(tuple)...); |
372 | x = start % X; |
373 | return start / X; |
374 | } |
375 | |
376 | inline bool nd_iterator_step() { |
377 | return true; |
378 | } |
379 | template <typename U, typename W, typename... Args> |
380 | inline bool nd_iterator_step(U &x, const W &X, Args &&... tuple) { |
381 | if (nd_iterator_step(utils::forward<Args>(tuple)...)) { |
382 | if (++x - X == 0) { |
383 | x = 0; |
384 | return true; |
385 | } |
386 | } |
387 | return false; |
388 | } |
389 | |
390 | template <typename U, typename W, typename Y> |
391 | inline bool nd_iterator_jump(U &cur, const U end, W &x, const Y &X) { |
392 | U max_jump = end - cur; |
393 | U dim_jump = X - x; |
394 | if (dim_jump <= max_jump) { |
395 | x = 0; |
396 | cur += dim_jump; |
397 | return true; |
398 | } else { |
399 | cur += max_jump; |
400 | x += max_jump; |
401 | return false; |
402 | } |
403 | } |
404 | template <typename U, typename W, typename Y, typename... Args> |
405 | inline bool nd_iterator_jump( |
406 | U &cur, const U end, W &x, const Y &X, Args &&... tuple) { |
407 | if (nd_iterator_jump(cur, end, utils::forward<Args>(tuple)...)) { |
408 | if (++x - X == 0) { |
409 | x = 0; |
410 | return true; |
411 | } |
412 | } |
413 | return false; |
414 | } |
415 | |
416 | template <typename T> |
417 | constexpr T pick(size_t i, const T &x0) { |
418 | return x0; |
419 | } |
420 | template <typename T, typename... Args> |
421 | constexpr T pick(size_t i, const T &x0, Args &&... args) { |
422 | return i == 0 ? x0 : pick(i - 1, utils::forward<Args>(args)...); |
423 | } |
424 | |
425 | template <typename T> |
426 | T pick_by_prop_kind(prop_kind_t prop_kind, const T &val_fwd_inference, |
427 | const T &val_fwd_training, const T &val_bwd_d, const T &val_bwd_w) { |
428 | switch (prop_kind) { |
429 | case prop_kind::forward_inference: return val_fwd_inference; |
430 | case prop_kind::forward_training: return val_fwd_training; |
431 | case prop_kind::backward_data: return val_bwd_d; |
432 | case prop_kind::backward_weights: return val_bwd_w; |
433 | default: assert(!"unsupported prop_kind" ); |
434 | } |
435 | return T(); |
436 | } |
437 | |
438 | template <typename T> |
439 | T pick_by_prop_kind(prop_kind_t prop_kind, const T &val_fwd, const T &val_bwd_d, |
440 | const T &val_bwd_w) { |
441 | return pick_by_prop_kind(prop_kind, val_fwd, val_fwd, val_bwd_d, val_bwd_w); |
442 | } |
443 | |
444 | template <typename Telem, size_t Tdims> |
445 | struct array_offset_calculator { |
446 | template <typename... Targs> |
447 | array_offset_calculator(Telem *base, Targs... Fargs) : _dims {Fargs...} { |
448 | _base_ptr = base; |
449 | } |
450 | |
451 | template <typename... Targs> |
452 | array_offset_calculator(std::nullptr_t, Targs... Fargs) = delete; |
453 | |
454 | template <typename... Targs> |
455 | inline Telem &operator()(Targs... Fargs) const { |
456 | assert(static_cast<bool>(_base_ptr)); |
457 | return *(_base_ptr + _offset(1, Fargs...)); |
458 | } |
459 | |
460 | private: |
461 | template <typename... Targs> |
462 | inline size_t _offset(size_t const dimension, size_t element) const { |
463 | return element; |
464 | } |
465 | |
466 | template <typename... Targs> |
467 | inline size_t _offset( |
468 | size_t const dimension, size_t theta, size_t element) const { |
469 | return element + (_dims[dimension] * theta); |
470 | } |
471 | |
472 | template <typename... Targs> |
473 | inline size_t _offset(size_t const dimension, size_t theta, size_t element, |
474 | Targs... Fargs) const { |
475 | size_t t_prime = element + (_dims[dimension] * theta); |
476 | return _offset(dimension + 1, t_prime, Fargs...); |
477 | } |
478 | |
479 | Telem *_base_ptr; |
480 | const int _dims[Tdims]; |
481 | }; |
482 | |
483 | template <typename derived_type, typename base_type> |
484 | inline derived_type downcast(base_type *base) { |
485 | assert(dynamic_cast<derived_type>(base) == base); |
486 | return static_cast<derived_type>(base); |
487 | } |
488 | |
489 | template <typename T, |
490 | typename std::enable_if<!std::is_same<typename std::decay<T>::type, |
491 | std::string>::value>::type * = nullptr> |
492 | auto format_cvt_impl(T &&t) -> decltype(std::forward<T>(t)) { |
493 | return std::forward<T>(t); |
494 | } |
495 | |
496 | template <typename T, |
497 | typename std::enable_if<std::is_same<typename std::decay<T>::type, |
498 | std::string>::value>::type * = nullptr> |
499 | const char *format_cvt_impl(T &&t) { |
500 | return std::forward<T>(t).c_str(); |
501 | } |
502 | |
503 | template <typename... Args> |
504 | std::string format_impl(const char *fmt, Args... args) { |
505 | size_t sz = snprintf(nullptr, 0, fmt, args...); |
506 | std::string buf(sz + 1, '\0'); |
507 | snprintf(&buf[0], sz + 1, fmt, args...); |
508 | buf.resize(sz); |
509 | return buf; |
510 | } |
511 | |
512 | template <typename... Args> |
513 | std::string format(const char *fmt, Args &&... args) { |
514 | return format_impl(fmt, format_cvt_impl(std::forward<Args>(args))...); |
515 | } |
516 | |
517 | // transforms @param l(ogical)_offset into a @param dims_pos based on input |
518 | // dimensions @param dims and @param ndims. |
519 | inline void l_dims_by_l_offset( |
520 | dims_t dims_pos, dim_t l_offset, const dims_t dims, int ndims) { |
521 | for (int rd = 0; rd < ndims; ++rd) { |
522 | const int d = ndims - 1 - rd; |
523 | /* switch to faster 32-bit division when possible. */ |
524 | if (l_offset <= INT32_MAX && dims[d] <= INT32_MAX) { |
525 | dims_pos[d] = (int32_t)l_offset % (int32_t)dims[d]; |
526 | l_offset = (int32_t)l_offset / (int32_t)dims[d]; |
527 | } else { |
528 | dims_pos[d] = l_offset % dims[d]; |
529 | l_offset /= dims[d]; |
530 | } |
531 | } |
532 | } |
533 | |
534 | inline int get_dims_mask(const dims_t dims1, const dims_t dims2, int ndims, |
535 | bool skip_dim_of_one = false) { |
536 | int mask = 0; |
537 | for (int d = 0; d < ndims; ++d) { |
538 | // Disable mask_bit for dimensions of `1` by request. |
539 | int mask_bit = skip_dim_of_one && dims1[d] == 1 ? 0 : (1 << d); |
540 | mask += dims1[d] == dims2[d] ? mask_bit : 0; |
541 | } |
542 | return mask; |
543 | }; |
544 | |
545 | inline void copy_dims_with_mask( |
546 | dims_t ddims, const dims_t sdims, int ndims, int mask) { |
547 | for (int d = 0; d < ndims; ++d) { |
548 | ddims[d] = (mask & (1 << d)) ? sdims[d] : 0; |
549 | } |
550 | } |
551 | |
552 | inline void apply_mask_on_dims(dims_t dims, int ndims, int mask) { |
553 | copy_dims_with_mask(dims, dims, ndims, mask); |
554 | } |
555 | |
556 | inline void dim_iterator(const dims_t dims, dims_t indices, int ndims) { |
557 | while (--ndims >= 0 && ++indices[ndims] >= dims[ndims]) { |
558 | indices[ndims] = 0; |
559 | } |
560 | } |
561 | |
562 | } // namespace utils |
563 | |
564 | int32_t fetch_and_add(int32_t *dst, int32_t val); |
565 | inline void yield_thread() {} |
566 | |
567 | // Reads an environment variable 'name' and stores its string value in the |
568 | // 'buffer' of 'buffer_size' bytes (including the terminating zero) on |
569 | // success. |
570 | // |
571 | // - Returns the length of the environment variable string value (excluding |
572 | // the terminating 0) if it is set and its contents (including the terminating |
573 | // 0) can be stored in the 'buffer' without truncation. |
574 | // |
575 | // - Returns negated length of environment variable string value and writes |
576 | // "\0" to the buffer (if it is not NULL) if the 'buffer_size' is to small to |
577 | // store the value (including the terminating 0) without truncation. |
578 | // |
579 | // - Returns 0 and writes "\0" to the buffer (if not NULL) if the environment |
580 | // variable is not set. |
581 | // |
582 | // - Returns INT_MIN if the 'name' is NULL. |
583 | // |
584 | // - Returns INT_MIN if the 'buffer_size' is negative. |
585 | // |
586 | // - Returns INT_MIN if the 'buffer' is NULL and 'buffer_size' is greater than |
587 | // zero. Passing NULL 'buffer' with 'buffer_size' set to 0 can be used to |
588 | // retrieve the length of the environment variable value string. |
589 | // |
590 | int getenv(const char *name, char *buffer, int buffer_size); |
591 | // Reads an integer from the environment. For internal needs. |
592 | int getenv_int(const char *name, int default_value = 0); |
593 | // Reads an integer from user environment. Takes a var name without |
594 | // prefix and checks both supported variants - with "ONEDNN_" (primary) and |
595 | // "DNNL_" (secondary) prefixes. |
596 | int getenv_int_user(const char *name, int default_value = 0); |
597 | // Reads a string literal from user environment. Takes a var name without |
598 | // prefix and checks both supported variants - with "ONEDNN_" (primary) and |
599 | // "DNNL_" (secondary) prefixes. |
600 | std::string getenv_string_user(const char *name); |
601 | |
602 | // Various getter for profiling info |
603 | bool get_jit_dump(); |
604 | unsigned get_jit_profiling_flags(); |
605 | std::string get_jit_profiling_jitdumpdir(); |
606 | FILE *fopen(const char *filename, const char *mode); |
607 | int getpagesize(); |
608 | |
609 | // return current library fpmath_mode |
610 | fpmath_mode_t get_fpmath_mode(); |
611 | // checks if an fpmath_mode is valid |
612 | status_t check_fpmath_mode(fpmath_mode_t mode); |
613 | // Returns true if values reprensented by type sub_dt can all be |
614 | // represented in dt. return false eotherwise |
615 | bool is_fpsubtype(data_type_t sub_dt, data_type_t dt); |
616 | |
617 | constexpr int msan_enabled = MSAN_ENABLED; |
618 | inline void msan_unpoison(void *ptr, size_t size) { |
619 | #if MSAN_ENABLED |
620 | __msan_unpoison(ptr, size); |
621 | #endif |
622 | } |
623 | |
624 | // std::optional? std::maybe? std::whatever |
625 | template <typename T> |
626 | struct setting_t { |
627 | private: |
628 | T value_; |
629 | bool initialized_; |
630 | |
631 | public: |
632 | constexpr setting_t() : value_ {}, initialized_ {false} {} |
633 | constexpr setting_t(const T init) : value_ {init}, initialized_ {false} {} |
634 | bool initialized() { return initialized_; } |
635 | T get() { return value_; } |
636 | void set(T new_value) { |
637 | value_ = new_value; |
638 | initialized_ = true; |
639 | } |
640 | DNNL_DISALLOW_COPY_AND_ASSIGN(setting_t); |
641 | }; |
642 | |
643 | // The following code is derived from Boost C++ library |
644 | // Copyright 2005-2014 Daniel James. |
645 | // Distributed under the Boost Software License, Version 1.0. (See accompanying |
646 | // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) |
647 | template <typename T> |
648 | static size_t hash_combine(size_t seed, const T &v) { |
649 | return seed ^= std::hash<T> {}(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); |
650 | } |
651 | |
652 | inline int float2int(float x) { |
653 | return utils::bit_cast<int>(x); |
654 | } |
655 | |
656 | // XXX: Currently SYCL doesn't provide an API to get device UUID but |
657 | // we need to be able to distinguish OpenCL device from Level0 device. |
658 | // As a temporary solution the compound ID will be used for that. |
659 | // Below is a table explaning what the numbers are for different backends: |
660 | // |
661 | // ------------------------------------------------------------- |
662 | // Backend | Compound ID |
663 | // ------------------------------------------------------------- |
664 | // Host | <backend_t::host, 0, 0> |
665 | // OpenCL | <backend_t::opencl, cl_device, 0> |
666 | // NVIDIA | <backend_t::nvidia, cuDevice, 0> |
667 | // Level0 | <backend_t::level0, uuid[0-63], uuid[64-127]> |
668 | // Pure CPU | <0, 0, 0> |
669 | // Pure GPU | <0, cl_device, 0> |
670 | using device_id_t = std::tuple<int, uint64_t, uint64_t>; |
671 | |
672 | struct device_id_hash_t { |
673 | size_t operator()(const device_id_t &id) const { |
674 | size_t result = 0; |
675 | result = hash_combine(result, std::get<0>(id)); |
676 | result = hash_combine(result, std::get<1>(id)); |
677 | result = hash_combine(result, std::get<2>(id)); |
678 | return result; |
679 | } |
680 | }; |
681 | |
682 | // A setting (basically a value) that can be set() multiple times until the |
683 | // time first time the get() method is called. The set() method is expected to |
684 | // be as expensive as a busy-waiting spinlock. The get() method is expected to |
685 | // be asymptotically as expensive as a single lock-prefixed memory read. The |
686 | // get() method also has a 'soft' mode when the setting is not locked for |
687 | // re-setting. This is used for testing purposes. |
688 | template <typename T> |
689 | struct set_once_before_first_get_setting_t { |
690 | private: |
691 | T value_; |
692 | std::atomic<unsigned> state_; |
693 | enum : unsigned { idle = 0, busy_setting = 1, locked = 2 }; |
694 | |
695 | public: |
696 | set_once_before_first_get_setting_t(T init) |
697 | : value_ {init}, state_ {idle} {} |
698 | |
699 | bool set(T new_value) { |
700 | if (state_.load() == locked) return false; |
701 | |
702 | while (true) { |
703 | unsigned expected = idle; |
704 | if (state_.compare_exchange_weak(expected, busy_setting)) break; |
705 | if (expected == locked) return false; |
706 | } |
707 | |
708 | value_ = new_value; |
709 | state_.store(locked); |
710 | return true; |
711 | } |
712 | |
713 | T get(bool soft = false) { |
714 | if (!soft && state_.load() != locked) { |
715 | while (true) { |
716 | unsigned expected = idle; |
717 | if (state_.compare_exchange_weak(expected, locked)) break; |
718 | if (expected == locked) break; |
719 | } |
720 | } |
721 | return value_; |
722 | } |
723 | }; |
724 | |
725 | inline bool is_native_runtime(runtime_kind_t kind) { |
726 | return utils::one_of(kind, runtime_kind::seq, runtime_kind::omp, |
727 | runtime_kind::tbb, runtime_kind::threadpool); |
728 | } |
729 | |
730 | // Convenience wrapper to choose at compile-time between std::unique_ptr's |
731 | // default deleter and a no-op one. |
732 | // |
733 | // This is useful for static pointers to objects with non-trivial destructors. |
734 | // In some environments (e.g. tests where not all threads are joined at exit |
735 | // time) these destructors can result in sanitizer failures (e.g. races in |
736 | // thread sanitizer) when destructing unique_ptr's, but not with raw pointers. |
737 | // Of course in a shared library environment using raw pointers (that are |
738 | // therefore never freed) would result in memory leaks; this is why |
739 | // DNNL_MAYBE_UNIQUE_PTR_IS_UNIQUE defaults to 1. |
740 | #ifndef DNNL_MAYBE_UNIQUE_PTR_IS_UNIQUE |
741 | #define DNNL_MAYBE_UNIQUE_PTR_IS_UNIQUE 1 |
742 | #endif |
743 | |
744 | #if DNNL_MAYBE_UNIQUE_PTR_IS_UNIQUE |
745 | template <typename T> |
746 | using maybe_unique_ptr = std::unique_ptr<T>; |
747 | #else |
748 | struct nop_deleter_t { |
749 | template <typename T> |
750 | void operator()(T const &) const noexcept {} |
751 | }; |
752 | template <typename T> |
753 | using maybe_unique_ptr = std::unique_ptr<T, nop_deleter_t>; |
754 | #endif // DNNL_MAYBE_UNIQUE_PTR_IS_UNIQUE |
755 | |
756 | } // namespace impl |
757 | } // namespace dnnl |
758 | |
759 | #endif |
760 | |
761 | // vim: et ts=4 sw=4 cindent cino+=l0,\:4,N-s |
762 | |