1#pragma once
2
3#include <c10/cuda/CUDADeviceAssertionHost.h>
4#include <c10/cuda/CUDAMacros.h>
5#include <c10/cuda/CUDAMiscFunctions.h>
6#include <c10/macros/Macros.h>
7#include <c10/util/Exception.h>
8#include <c10/util/irange.h>
9#include <cuda.h>
10
11// Note [CHECK macro]
12// ~~~~~~~~~~~~~~~~~~
13// This is a macro so that AT_ERROR can get accurate __LINE__
14// and __FILE__ information. We could split this into a short
15// macro and a function implementation if we pass along __LINE__
16// and __FILE__, but no one has found this worth doing.
17
18// Used to denote errors from CUDA framework.
19// This needs to be declared here instead util/Exception.h for proper conversion
20// during hipify.
21namespace c10 {
22class C10_CUDA_API CUDAError : public c10::Error {
23 using Error::Error;
24};
25} // namespace c10
26
27#define C10_CUDA_CHECK(EXPR) \
28 do { \
29 const cudaError_t __err = EXPR; \
30 c10::cuda::c10_cuda_check_implementation( \
31 static_cast<int32_t>(__err), \
32 __FILE__, \
33 __func__, /* Line number data type not well-defined between \
34 compilers, so we perform an explicit cast */ \
35 static_cast<uint32_t>(__LINE__), \
36 true); \
37 } while (0)
38
39#define C10_CUDA_CHECK_WARN(EXPR) \
40 do { \
41 const cudaError_t __err = EXPR; \
42 if (C10_UNLIKELY(__err != cudaSuccess)) { \
43 auto error_unused C10_UNUSED = cudaGetLastError(); \
44 (void)error_unused; \
45 TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
46 } \
47 } while (0)
48
49// Indicates that a CUDA error is handled in a non-standard way
50#define C10_CUDA_ERROR_HANDLED(EXPR) EXPR
51
52// Intentionally ignore a CUDA error
53#define C10_CUDA_IGNORE_ERROR(EXPR) \
54 do { \
55 const cudaError_t __err = EXPR; \
56 if (C10_UNLIKELY(__err != cudaSuccess)) { \
57 cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
58 (void)error_unused; \
59 } \
60 } while (0)
61
62// Clear the last CUDA error
63#define C10_CUDA_CLEAR_ERROR() \
64 do { \
65 cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
66 (void)error_unused; \
67 } while (0)
68
69// This should be used directly after every kernel launch to ensure
70// the launch happened correctly and provide an early, close-to-source
71// diagnostic if it didn't.
72#define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())
73
74/// Launches a CUDA kernel appending to it all the information need to handle
75/// device-side assertion failures. Checks that the launch was successful.
76#define TORCH_DSA_KERNEL_LAUNCH( \
77 kernel, blocks, threads, shared_mem, stream, ...) \
78 do { \
79 auto& launch_registry = \
80 c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref(); \
81 kernel<<<blocks, threads, shared_mem, stream>>>( \
82 __VA_ARGS__, \
83 launch_registry.get_uvm_assertions_ptr_for_current_device(), \
84 launch_registry.insert( \
85 __FILE__, __FUNCTION__, __LINE__, #kernel, stream.id())); \
86 C10_CUDA_KERNEL_LAUNCH_CHECK(); \
87 } while (0)
88
89namespace c10 {
90namespace cuda {
91
92/// In the event of a CUDA failure, formats a nice error message about that
93/// failure and also checks for device-side assertion failures
94C10_CUDA_API void c10_cuda_check_implementation(
95 const int32_t err,
96 const char* filename,
97 const char* function_name,
98 const int line_number,
99 const bool include_device_assertions);
100
101} // namespace cuda
102} // namespace c10
103