1 | #ifndef C10_MACROS_MACROS_H_ |
2 | #define C10_MACROS_MACROS_H_ |
3 | #include <cassert> |
4 | |
5 | /* Main entry for c10/macros. |
6 | * |
7 | * In your code, include c10/macros/Macros.h directly, instead of individual |
8 | * files in this folder. |
9 | */ |
10 | |
11 | // For build systems that do not directly depend on CMake and directly build |
12 | // from the source directory (such as Buck), one may not have a cmake_macros.h |
13 | // file at all. In this case, the build system is responsible for providing |
14 | // correct macro definitions corresponding to the cmake_macros.h.in file. |
15 | // |
16 | // In such scenarios, one should define the macro |
17 | // C10_USING_CUSTOM_GENERATED_MACROS |
18 | // to inform this header that it does not need to include the cmake_macros.h |
19 | // file. |
20 | |
21 | #ifndef C10_USING_CUSTOM_GENERATED_MACROS |
22 | #include <c10/macros/cmake_macros.h> |
23 | #endif // C10_USING_CUSTOM_GENERATED_MACROS |
24 | |
25 | #include <c10/macros/Export.h> |
26 | |
27 | #if defined(__clang__) |
28 | #define __ubsan_ignore_float_divide_by_zero__ \ |
29 | __attribute__((no_sanitize("float-divide-by-zero"))) |
30 | #define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined"))) |
31 | #define __ubsan_ignore_signed_int_overflow__ \ |
32 | __attribute__((no_sanitize("signed-integer-overflow"))) |
33 | #define __ubsan_ignore_function__ __attribute__((no_sanitize("function"))) |
34 | #else |
35 | #define __ubsan_ignore_float_divide_by_zero__ |
36 | #define __ubsan_ignore_undefined__ |
37 | #define __ubsan_ignore_signed_int_overflow__ |
38 | #define __ubsan_ignore_function__ |
39 | #endif |
40 | |
41 | // Detect address sanitizer as some stuff doesn't work with it |
42 | #undef C10_ASAN_ENABLED |
43 | |
44 | // for clang |
45 | #if defined(__has_feature) |
46 | #if ((__has_feature(address_sanitizer))) |
47 | #define C10_ASAN_ENABLED 1 |
48 | #endif |
49 | #endif |
50 | |
51 | // for gcc |
52 | #if defined(__SANITIZE_ADDRESS__) |
53 | #if __SANITIZE_ADDRESS__ |
54 | #if !defined(C10_ASAN_ENABLED) |
55 | #define C10_ASAN_ENABLED 1 |
56 | #endif |
57 | #endif |
58 | #endif |
59 | |
60 | #if !defined(C10_ASAN_ENABLED) |
61 | #define C10_ASAN_ENABLED 0 |
62 | #endif |
63 | |
64 | // Disable the copy and assignment operator for a class. Note that this will |
65 | // disable the usage of the class in std containers. |
66 | #define C10_DISABLE_COPY_AND_ASSIGN(classname) \ |
67 | classname(const classname&) = delete; \ |
68 | classname& operator=(const classname&) = delete |
69 | |
70 | #define C10_CONCATENATE_IMPL(s1, s2) s1##s2 |
71 | #define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2) |
72 | |
73 | #define C10_MACRO_EXPAND(args) args |
74 | |
75 | #define C10_STRINGIZE_IMPL(x) #x |
76 | #define C10_STRINGIZE(x) C10_STRINGIZE_IMPL(x) |
77 | |
78 | /** |
79 | * C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with |
80 | * str and ending with a number that varies with the line. |
81 | */ |
82 | #ifdef __COUNTER__ |
83 | #define C10_UID __COUNTER__ |
84 | #define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__) |
85 | #else |
86 | #define C10_UID __LINE__ |
87 | #define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__) |
88 | #endif |
89 | |
90 | #ifdef __has_cpp_attribute |
91 | #define C10_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) |
92 | #else |
93 | #define C10_HAS_CPP_ATTRIBUTE(x) (0) |
94 | #endif |
95 | |
96 | /// C10_NODISCARD - Warn if a type or return value is discarded. |
97 | |
98 | // Technically, we should check if __cplusplus > 201402L here, because |
99 | // [[nodiscard]] is only defined in C++17. However, some compilers |
100 | // we care about don't advertise being C++17 (e.g., clang), but |
101 | // support the attribute anyway. In fact, this is not just a good idea, |
102 | // it's the law: clang::warn_unused_result doesn't work on nvcc + clang |
103 | // and the best workaround for this case is to use [[nodiscard]] |
104 | // instead; see https://github.com/pytorch/pytorch/issues/13118 |
105 | // |
106 | // Note to future editors: if you have noticed that a compiler is |
107 | // misbehaving (e.g., it advertises support, but the support doesn't |
108 | // actually work, or it is emitting warnings). Some compilers which |
109 | // are strict about the matter include MSVC, which will complain: |
110 | // |
111 | // error C2429: attribute 'nodiscard' requires compiler flag '/std:c++latest' |
112 | // |
113 | // Exhibits: |
114 | // - MSVC 19.14: https://godbolt.org/z/Dzd7gn (requires /std:c++latest) |
115 | // - Clang 8.0.0: https://godbolt.org/z/3PYL4Z (always advertises support) |
116 | // - gcc 8.3: https://godbolt.org/z/4tLMQS (always advertises support) |
117 | #if C10_HAS_CPP_ATTRIBUTE(nodiscard) |
118 | #define C10_NODISCARD [[nodiscard]] |
119 | // Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious |
120 | // error when __has_cpp_attribute is given a scoped attribute in C mode. |
121 | #elif __cplusplus && C10_HAS_CPP_ATTRIBUTE(clang::warn_unused_result) |
122 | // TODO: It's possible this is still triggering |
123 | // https://github.com/pytorch/pytorch/issues/13118 on Windows; if it is, better |
124 | // fix it. |
125 | #define C10_NODISCARD [[clang::warn_unused_result]] |
126 | #else |
127 | #define C10_NODISCARD |
128 | #endif |
129 | |
130 | // suppress an unused variable. |
131 | #if defined(_MSC_VER) && !defined(__clang__) |
132 | #define C10_UNUSED __pragma(warning(suppress : 4100 4101)) |
133 | #else |
134 | #define C10_UNUSED __attribute__((__unused__)) |
135 | #endif //_MSC_VER |
136 | |
137 | // Direct port of LLVM_ATTRIBUTE_USED. |
138 | #if __has_attribute(used) |
139 | #define C10_USED __attribute__((__used__)) |
140 | #else |
141 | #define C10_USED |
142 | #endif |
143 | |
144 | #define C10_RESTRICT __restrict |
145 | |
146 | // Simply define the namespace, in case a dependent library want to refer to |
147 | // the c10 namespace but not any nontrivial files. |
148 | namespace c10 {} // namespace c10 |
149 | namespace c10 { |
150 | namespace cuda {} |
151 | } // namespace c10 |
152 | namespace c10 { |
153 | namespace hip {} |
154 | } // namespace c10 |
155 | |
156 | // Since C10 is the core library for caffe2 (and aten), we will simply reroute |
157 | // all abstractions defined in c10 to be available in caffe2 as well. |
158 | // This is only for backwards compatibility. Please use the symbols from the |
159 | // c10 namespace where possible. |
160 | namespace caffe2 { |
161 | using namespace c10; |
162 | } |
163 | namespace at { |
164 | using namespace c10; |
165 | } |
166 | namespace at { |
167 | namespace cuda { |
168 | using namespace c10::cuda; |
169 | } |
170 | } // namespace at |
171 | |
172 | // WARNING!!! THIS IS A GIANT HACK!!! |
173 | // This line means you cannot simultaneously include c10/hip |
174 | // and c10/cuda and then use them from the at::cuda namespace. |
175 | // This is true in practice, because HIPIFY works inplace on |
176 | // files in ATen/cuda, so it assumes that c10::hip is available |
177 | // from at::cuda. This namespace makes that happen. When |
178 | // HIPIFY is no longer out-of-place, we can switch the cuda |
179 | // here to hip and everyone is happy. |
180 | namespace at { |
181 | namespace cuda { |
182 | using namespace c10::hip; |
183 | } |
184 | } // namespace at |
185 | |
186 | // C10_LIKELY/C10_UNLIKELY |
187 | // |
188 | // These macros provide parentheses, so you can use these macros as: |
189 | // |
190 | // if C10_LIKELY(some_expr) { |
191 | // ... |
192 | // } |
193 | // |
194 | // NB: static_cast to boolean is mandatory in C++, because __builtin_expect |
195 | // takes a long argument, which means you may trigger the wrong conversion |
196 | // without it. |
197 | // |
198 | #if defined(__GNUC__) || defined(__ICL) || defined(__clang__) |
199 | #define C10_LIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 1)) |
200 | #define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0)) |
201 | #else |
202 | #define C10_LIKELY(expr) (expr) |
203 | #define C10_UNLIKELY(expr) (expr) |
204 | #endif |
205 | |
206 | /// C10_NOINLINE - Functions whose declaration is annotated with this will not |
207 | /// be inlined. |
208 | #ifdef __GNUC__ |
209 | #define C10_NOINLINE __attribute__((noinline)) |
210 | #elif _MSC_VER |
211 | #define C10_NOINLINE __declspec(noinline) |
212 | #else |
213 | #define C10_NOINLINE |
214 | #endif |
215 | |
216 | #if defined(_MSC_VER) |
217 | #define C10_ALWAYS_INLINE __forceinline |
218 | #elif __has_attribute(always_inline) || defined(__GNUC__) |
219 | #define C10_ALWAYS_INLINE __attribute__((__always_inline__)) inline |
220 | #else |
221 | #define C10_ALWAYS_INLINE inline |
222 | #endif |
223 | |
224 | #if defined(_MSC_VER) |
225 | #define C10_ATTR_VISIBILITY_HIDDEN |
226 | #elif defined(__GNUC__) |
227 | #define C10_ATTR_VISIBILITY_HIDDEN __attribute__((__visibility__("hidden"))) |
228 | #else |
229 | #define C10_ATTR_VISIBILITY_HIDDEN |
230 | #endif |
231 | |
232 | #define C10_ERASE C10_ALWAYS_INLINE C10_ATTR_VISIBILITY_HIDDEN |
233 | |
234 | // C10_FALLTHROUGH - Annotate fallthrough to the next case in a switch. |
235 | #if C10_HAS_CPP_ATTRIBUTE(fallthrough) |
236 | #define C10_FALLTHROUGH [[fallthrough]] |
237 | #else |
238 | #define C10_FALLTHROUGH |
239 | #endif |
240 | |
241 | #include <cstdint> |
242 | |
243 | #ifdef __HIPCC__ |
244 | // Unlike CUDA, HIP requires a HIP header to be included for __host__ to work. |
245 | // We do this #include here so that C10_HOST_DEVICE and friends will Just Work. |
246 | // See https://github.com/ROCm-Developer-Tools/HIP/issues/441 |
247 | #include <hip/hip_runtime.h> |
248 | #endif |
249 | |
250 | #if defined(__CUDACC__) || defined(__HIPCC__) |
251 | // Designates functions callable from the host (CPU) and the device (GPU) |
252 | #define C10_HOST_DEVICE __host__ __device__ |
253 | #define C10_DEVICE __device__ |
254 | #define C10_HOST __host__ |
255 | // constants from |
256 | // (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications) |
257 | // The maximum number of threads per multiprocessor is 1024 for Turing |
258 | // architecture (7.5), 1536 for Geforce Ampere (8.6)/Jetson Orin (8.7), and |
259 | // 2048 for all other architectures. You'll get warnings if you exceed these |
260 | // constants. Hence, the following macros adjust the input values from the user |
261 | // to resolve potential warnings. |
262 | #if __CUDA_ARCH__ == 750 |
263 | constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024; |
264 | #elif __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 |
265 | constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1536; |
266 | #else |
267 | constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048; |
268 | #endif |
269 | // CUDA_MAX_THREADS_PER_BLOCK is same for all architectures currently |
270 | constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024; |
271 | // CUDA_THREADS_PER_BLOCK_FALLBACK is the "canonical fallback" choice of block |
272 | // size. 256 is a good number for this fallback and should give good occupancy |
273 | // and versatility across all architectures. |
274 | constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; |
275 | // NOTE: if you are thinking of constexpr-ify the inputs to launch bounds, it |
276 | // turns out that although __launch_bounds__ can take constexpr, it |
277 | // can't take a constexpr that has anything to do with templates. |
278 | // Currently we use launch_bounds that depend on template arguments in |
279 | // Loops.cuh, Reduce.cuh and LossCTC.cuh. Hence, C10_MAX_THREADS_PER_BLOCK |
280 | // and C10_MIN_BLOCKS_PER_SM are kept as macros. |
281 | // Suppose you were planning to write __launch_bounds__(a, b), based on your |
282 | // performance tuning on a modern GPU. Instead, you should write |
283 | // __launch_bounds__(C10_MAX_THREADS_PER_BLOCK(a), C10_MIN_BLOCKS_PER_SM(a, b)), |
284 | // which will also properly respect limits on old architectures. |
285 | #define C10_MAX_THREADS_PER_BLOCK(val) \ |
286 | (((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) \ |
287 | : CUDA_THREADS_PER_BLOCK_FALLBACK) |
288 | #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \ |
289 | ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \ |
290 | ? (blocks_per_sm) \ |
291 | : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) / \ |
292 | (threads_per_block)))) |
293 | // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__ |
294 | #define C10_LAUNCH_BOUNDS_0 \ |
295 | __launch_bounds__( \ |
296 | 256, 4) // default launch bounds that should give good occupancy and |
297 | // versatility across all architectures. |
298 | #define C10_LAUNCH_BOUNDS_1(max_threads_per_block) \ |
299 | __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block)))) |
300 | #define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) \ |
301 | __launch_bounds__( \ |
302 | (C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), \ |
303 | (C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm)))) |
304 | #else |
305 | #define C10_HOST_DEVICE |
306 | #define C10_HOST |
307 | #define C10_DEVICE |
308 | #endif |
309 | |
310 | #if defined(USE_ROCM) |
311 | #define C10_HIP_HOST_DEVICE __host__ __device__ |
312 | #else |
313 | #define C10_HIP_HOST_DEVICE |
314 | #endif |
315 | |
316 | #if defined(USE_ROCM) |
317 | #define C10_WARP_SIZE warpSize // = 64 or 32 (Defined in hip_runtime.h) |
318 | #else |
319 | #define C10_WARP_SIZE 32 |
320 | #endif |
321 | |
322 | #if defined(_MSC_VER) && _MSC_VER <= 1900 |
323 | #define __func__ __FUNCTION__ |
324 | #endif |
325 | |
326 | // CUDA_KERNEL_ASSERT checks the assertion |
327 | // even when NDEBUG is defined. This is useful for important assertions in CUDA |
328 | // code that would otherwise be suppressed when building Release. |
329 | #if defined(__ANDROID__) || defined(__APPLE__) || \ |
330 | (defined(USE_ROCM) && ROCM_VERSION < 40100) |
331 | // Those platforms do not support assert() |
332 | #define CUDA_KERNEL_ASSERT(cond) |
333 | #define SYCL_KERNEL_ASSERT(cond) |
334 | #elif defined(_MSC_VER) |
335 | #if defined(NDEBUG) |
336 | extern "C" { |
337 | C10_IMPORT |
338 | #if defined(__SYCL_DEVICE_ONLY__) |
339 | extern SYCL_EXTERNAL void _wassert( |
340 | const wchar_t* wexpr, |
341 | const wchar_t* wfile, |
342 | unsigned line); |
343 | #else |
344 | #if defined(__CUDA_ARCH__) |
345 | __host__ __device__ |
346 | #endif // __CUDA_ARCH__ |
347 | void |
348 | _wassert(wchar_t const* _Message, wchar_t const* _File, unsigned _Line); |
349 | #endif // __SYCL_DEVICE_ONLY__ |
350 | } |
351 | #endif // NDEBUG |
352 | #define CUDA_KERNEL_ASSERT(cond) \ |
353 | if (C10_UNLIKELY(!(cond))) { \ |
354 | (void)(_wassert(_CRT_WIDE(#cond), _CRT_WIDE(__FILE__), static_cast<unsigned>(__LINE__)), 0); \ |
355 | } |
356 | #define SYCL_KERNEL_ASSERT(cond) \ |
357 | if (C10_UNLIKELY(!(cond))) { \ |
358 | (void)(_wassert(_CRT_WIDE(#cond), _CRT_WIDE(__FILE__), static_cast<unsigned>(__LINE__)), 0); \ |
359 | } |
360 | #else // __APPLE__, _MSC_VER |
361 | #if defined(NDEBUG) |
362 | extern "C" { |
363 | #if defined(__SYCL_DEVICE_ONLY__) |
364 | extern SYCL_EXTERNAL void __assert_fail( |
365 | const char* expr, |
366 | const char* file, |
367 | unsigned int line, |
368 | const char* func); |
369 | #else // __SYCL_DEVICE_ONLY__ |
370 | #if ( \ |
371 | defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \ |
372 | !defined(TORCH_DISABLE_GPU_ASSERTS)) |
373 | // CUDA supports __assert_fail function which are common for both device |
374 | // and host side code. |
375 | __host__ __device__ |
376 | #endif |
377 | |
378 | // This forward declaration matching the declaration of __assert_fail |
379 | // exactly how it is in glibc in case parts of the program are compiled with |
380 | // different NDEBUG settings. Otherwise we might get 'ambiguous declaration' |
381 | // error. Note: On ROCm - this declaration serves for host side compilation. |
382 | void |
383 | __assert_fail( |
384 | const char* assertion, |
385 | const char* file, |
386 | unsigned int line, |
387 | const char* function) noexcept __attribute__((__noreturn__)); |
388 | |
389 | #if (defined(__HIP_ARCH__) || defined(__HIP__)) && \ |
390 | !defined(TORCH_DISABLE_GPU_ASSERTS) |
391 | // ROCm supports __assert_fail only as a device side function. |
392 | __device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( |
393 | const char* assertion, |
394 | const char* file, |
395 | unsigned int line, |
396 | const char* function); |
397 | #endif // defined(__HIP_ARCH__) || defined(__HIP__) |
398 | #endif // __SYCL_DEVICE_ONLY__ |
399 | } |
400 | #endif // NDEBUG |
401 | #define CUDA_KERNEL_ASSERT(cond) \ |
402 | if (C10_UNLIKELY(!(cond))) { \ |
403 | __assert_fail( \ |
404 | #cond, __FILE__, static_cast<unsigned int>(__LINE__), __func__); \ |
405 | } |
406 | #define SYCL_KERNEL_ASSERT(cond) \ |
407 | if (C10_UNLIKELY(!(cond))) { \ |
408 | __assert_fail( \ |
409 | #cond, __FILE__, static_cast<unsigned int>(__LINE__), __func__); \ |
410 | } |
411 | #endif // __APPLE__ |
412 | |
413 | #ifdef __APPLE__ |
414 | #include <TargetConditionals.h> |
415 | #endif |
416 | |
417 | #if defined(__ANDROID__) |
418 | #define C10_ANDROID 1 |
419 | #define C10_MOBILE 1 |
420 | #elif ( \ |
421 | defined(__APPLE__) && \ |
422 | (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)) |
423 | #define C10_IOS 1 |
424 | #define C10_MOBILE 1 |
425 | #endif // ANDROID / IOS |
426 | |
427 | #if defined(C10_MOBILE) && C10_MOBILE |
428 | #define C10_ALWAYS_INLINE_UNLESS_MOBILE inline |
429 | #else |
430 | #define C10_ALWAYS_INLINE_UNLESS_MOBILE C10_ALWAYS_INLINE |
431 | #endif |
432 | |
433 | // Portable determination of whether type T is trivially copyable. |
434 | // Warning: __has_trivial_copy for GCC may not always detect the non-POD |
435 | // correctly. For example, T = std::unique_ptr may evaluate to true and be |
436 | // treated as POD. This can cause unexpected behavior. |
437 | #if defined(__GNUG__) && __GNUC__ < 5 && !defined(__clang__) |
438 | #define C10_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) |
439 | #else |
440 | #define C10_IS_TRIVIALLY_COPYABLE(T) std::is_trivially_copyable<T>::value |
441 | #endif |
442 | |
443 | #if defined(__CUDA_ARCH__) |
444 | #if defined(_MSC_VER) && defined(__CUDACC__) |
445 | #define CONSTEXPR_EXCEPT_WIN_CUDA const |
446 | #define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA __host__ |
447 | |
448 | // Note [static constexpr char* members for windows NVCC] |
449 | // The Windows NVCC compiler doesn't handle static constexpr class members, |
450 | // although it's fixed in a later version. |
451 | // (see |
452 | // https://developercommunity.visualstudio.com/t/intellisense-error-c11-static-constexpr-member-ini/245425) |
453 | // |
454 | // If we want to ensure that our field is static under all builds, then we need |
455 | // to work around it specifically for windows NVCC by making it (a) const, (b) |
456 | // defined outside of the class definition We need to define it outside of the |
457 | // class definition because of the C++ standard; char* is not an integral type |
458 | // (see |
459 | // https://stackoverflow.com/questions/24278473/intellisense-a-member-of-type-const-char-const-cannot-have-an-in-class-in) |
460 | // |
461 | // So instead of this: |
462 | // struct Foo { |
463 | // static constexpr const char* name = "foo"; |
464 | // } |
465 | // In Windows NVCC, we end up with this: |
466 | // struct Foo { |
467 | // static const char* name; |
468 | // } |
469 | // const char* Foo::name = "foo"; |
470 | // |
471 | // This gives us a small perf hit for any code that wants to access these field |
472 | // members, but right now it isn't used in any perf-critical code paths. |
473 | #define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
474 | static const char* field; |
475 | #define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) \ |
476 | const char* cls::field = val; |
477 | #else |
478 | #define CONSTEXPR_EXCEPT_WIN_CUDA constexpr |
479 | #define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA __host__ |
480 | |
481 | #define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
482 | static constexpr const char* field = val; |
483 | #define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) |
484 | #endif |
485 | #else |
486 | #if defined(_MSC_VER) && defined(__CUDACC__) |
487 | #define CONSTEXPR_EXCEPT_WIN_CUDA const |
488 | #define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA |
489 | |
490 | #define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
491 | static const char* field; |
492 | #define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) \ |
493 | const char* cls::field = val; |
494 | #else |
495 | #define CONSTEXPR_EXCEPT_WIN_CUDA constexpr |
496 | #define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA constexpr |
497 | |
498 | #define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
499 | static constexpr const char* field = val; |
500 | #define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) |
501 | #endif |
502 | #endif |
503 | |
504 | #ifndef HAS_DEMANGLE |
505 | #if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) |
506 | #define HAS_DEMANGLE 0 |
507 | #elif defined(__APPLE__) && \ |
508 | (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE) |
509 | #define HAS_DEMANGLE 0 |
510 | #else |
511 | #define HAS_DEMANGLE 1 |
512 | #endif |
513 | #endif // HAS_DEMANGLE |
514 | |
515 | #define _C10_PRAGMA__(string) _Pragma(#string) |
516 | #define _C10_PRAGMA_(string) _C10_PRAGMA__(string) |
517 | |
518 | #ifdef __clang__ |
519 | #define C10_CLANG_DIAGNOSTIC_PUSH() _Pragma("clang diagnostic push") |
520 | #define C10_CLANG_DIAGNOSTIC_POP() _Pragma("clang diagnostic pop") |
521 | #define C10_CLANG_DIAGNOSTIC_IGNORE(flag) \ |
522 | _C10_PRAGMA_(clang diagnostic ignored flag) |
523 | #define C10_CLANG_HAS_WARNING(flag) __has_warning(flag) |
524 | #else |
525 | #define C10_CLANG_DIAGNOSTIC_PUSH() |
526 | #define C10_CLANG_DIAGNOSTIC_POP() |
527 | #define C10_CLANG_DIAGNOSTIC_IGNORE(flag) |
528 | #define C10_CLANG_HAS_WARNING(flag) 0 |
529 | #endif |
530 | |
531 | #ifdef __clang__ |
532 | |
533 | #define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \ |
534 | _C10_PRAGMA_(clang diagnostic push) \ |
535 | _C10_PRAGMA_(clang diagnostic ignored "-Wunknown-warning-option") \ |
536 | _C10_PRAGMA_(clang diagnostic ignored warning) |
537 | |
538 | #define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(clang diagnostic pop) |
539 | |
540 | #elif __GNUC__ |
541 | |
542 | #define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \ |
543 | _C10_PRAGMA_(GCC diagnostic push) \ |
544 | _C10_PRAGMA_(GCC diagnostic ignored "-Wpragmas") \ |
545 | _C10_PRAGMA_(GCC diagnostic ignored warning) |
546 | |
547 | #define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(GCC diagnostic pop) |
548 | |
549 | #else |
550 | |
551 | #define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) |
552 | #define C10_DIAGNOSTIC_POP() |
553 | |
554 | #endif |
555 | |
556 | #endif // C10_MACROS_MACROS_H_ |
557 | |