1 | #pragma once |
2 | |
3 | #include <c10/core/Device.h> |
4 | |
5 | namespace c10 { |
6 | |
7 | using CopyBytesFunction = void (*)( |
8 | size_t nbytes, |
9 | const void* src, |
10 | Device src_device, |
11 | void* dst, |
12 | Device dst_device); |
13 | |
14 | struct C10_API _CopyBytesFunctionRegisterer { |
15 | _CopyBytesFunctionRegisterer( |
16 | DeviceType from, |
17 | DeviceType to, |
18 | CopyBytesFunction func_sync, |
19 | CopyBytesFunction func_async = nullptr); |
20 | }; |
21 | |
22 | #define REGISTER_COPY_BYTES_FUNCTION(from, to, ...) \ |
23 | namespace { \ |
24 | static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \ |
25 | g_copy_function)(from, to, __VA_ARGS__); \ |
26 | } |
27 | |
28 | /* |
29 | * WARNING: Implementations for this function are currently registered from |
30 | * ATen and caffe2, not yet from c10. Don't use this if not either ATen |
31 | * or caffe2 is present as well. |
32 | * We can't move them yet, because the CUDA implementations aren't unified yet |
33 | * between ATen and caffe2. |
34 | * We're planning to move the implementations into c10/backend/xxx |
35 | * to make c10 self contained again. |
36 | */ |
37 | C10_API void CopyBytes( |
38 | size_t nbytes, |
39 | const void* src, |
40 | Device src_device, |
41 | void* dst, |
42 | Device dst_device, |
43 | bool async); |
44 | } // namespace c10 |
45 | |